diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 472d18e73da78d..cba8afbb54f0f0 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -33,18 +33,6 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number }} cancel-in-progress: true - -env: - # LLVM POST-BRANCH bump version - # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" - # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_HEAD_VERSION: "19" # Used compiler, update POST-BRANCH. - LLVM_PREVIOUS_VERSION: "18" - LLVM_OLDEST_VERSION: "17" - GCC_STABLE_VERSION: "13" - LLVM_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer-19" - CLANG_CRASH_DIAGNOSTICS_DIR: "crash_diagnostics" - jobs: stage1: if: github.repository_owner == 'llvm' diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 2880bfd03be789..320623cfa15af1 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -170,6 +170,9 @@ class DataAggregator : public DataReader { std::string BuildIDBinaryName; /// Memory map info for a single file as recorded in perf.data + /// When a binary has multiple text segments, the Size is computed as the + /// difference of the last address of these segments from the BaseAddress. + /// The base addresses of all text segments must be the same. struct MMapInfo { uint64_t BaseAddress{0}; /// Base address of the mapped binary. uint64_t MMapAddress{0}; /// Address of the executable segment. @@ -493,6 +496,11 @@ class DataAggregator : public DataReader { /// and return a file name matching a given \p FileBuildID. std::optional getFileNameForBuildID(StringRef FileBuildID); + /// Get a constant reference to the parsed binary mmap entries. + const std::unordered_map &getBinaryMMapInfo() { + return BinaryMMapInfo; + } + friend class YAMLProfileWriter; }; } // namespace bolt diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 697cac9fbcaa08..2b02086e3e0c99 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -95,6 +95,12 @@ cl::opt ReadPreAggregated( "pa", cl::desc("skip perf and read data from a pre-aggregated file format"), cl::cat(AggregatorCategory)); +cl::opt + ReadPerfEvents("perf-script-events", + cl::desc("skip perf event collection by supplying a " + "perf-script output in a textual format"), + cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory)); + static cl::opt TimeAggregator("time-aggr", cl::desc("time BOLT aggregator"), @@ -167,8 +173,9 @@ void DataAggregator::findPerfExecutable() { void DataAggregator::start() { outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; - // Don't launch perf for pre-aggregated files - if (opts::ReadPreAggregated) + // Don't launch perf for pre-aggregated files or when perf input is specified + // by the user. + if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) return; findPerfExecutable(); @@ -464,6 +471,13 @@ void DataAggregator::filterBinaryMMapInfo() { int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, PerfProcessErrorCallbackTy Callback) { + if (!opts::ReadPerfEvents.empty()) { + outs() << "PERF2BOLT: using pre-processed perf events for '" << Name + << "' (perf-script-events)\n"; + ParsingBuf = opts::ReadPerfEvents; + return 0; + } + std::string Error; outs() << "PERF2BOLT: waiting for perf " << Name << " collection to finish...\n"; @@ -2056,15 +2070,6 @@ std::error_code DataAggregator::parseMMapEvents() { if (FileMMapInfo.first == "(deleted)") continue; - // Consider only the first mapping of the file for any given PID - auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first); - bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) { - return MI.second.PID == FileMMapInfo.second.PID; - }); - - if (PIDExists) - continue; - GlobalMMapInfo.insert(FileMMapInfo); } @@ -2116,12 +2121,22 @@ std::error_code DataAggregator::parseMMapEvents() { << " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset) << ". Ignoring profile data for this mapping\n"; continue; - } else { - MMapInfo.BaseAddress = *BaseAddress; } + MMapInfo.BaseAddress = *BaseAddress; } - BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)); + // Try to add MMapInfo to the map and update its size. Large binaries may + // span to multiple text segments, so the mapping is inserted only on the + // first occurrence. + if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second) + assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress && + "Base address on multiple segment mappings should match"); + + // Update mapping size. + const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size; + const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress; + if (Size > BinaryMMapInfo[MMapInfo.PID].Size) + BinaryMMapInfo[MMapInfo.PID].Size = Size; } if (BinaryMMapInfo.empty()) { diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index bad7108dad0b7b..208cf6ced73585 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp MCPlusBuilder.cpp + MemoryMaps.cpp DynoStats.cpp DISABLE_LLVM_LINK_LLVM_DYLIB @@ -17,6 +18,8 @@ target_link_libraries(CoreTests PRIVATE LLVMBOLTCore LLVMBOLTRewrite + LLVMBOLTProfile + LLVMTestingSupport ) foreach (tgt ${BOLT_TARGETS_TO_BUILD}) diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp index cd6f24c4570a79..c66c2d0c0fb16e 100644 --- a/bolt/unittests/Core/MCPlusBuilder.cpp +++ b/bolt/unittests/Core/MCPlusBuilder.cpp @@ -90,15 +90,14 @@ INSTANTIATE_TEST_SUITE_P(AArch64, MCPlusBuilderTester, ::testing::Values(Triple::aarch64)); TEST_P(MCPlusBuilderTester, AliasX0) { - uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI, - AArch64::X0, AArch64::W0_W1, + uint64_t AliasesX0[] = {AArch64::W0, AArch64::X0, AArch64::W0_W1, AArch64::X0_X1, AArch64::X0_X1_X2_X3_X4_X5_X6_X7}; size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0); testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count); } TEST_P(MCPlusBuilderTester, AliasSmallerX0) { - uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI, AArch64::X0}; + uint64_t AliasesX0[] = {AArch64::W0, AArch64::X0}; size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0); testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count, true); } diff --git a/bolt/unittests/Core/MemoryMaps.cpp b/bolt/unittests/Core/MemoryMaps.cpp new file mode 100644 index 00000000000000..9b5769d051cb6f --- /dev/null +++ b/bolt/unittests/Core/MemoryMaps.cpp @@ -0,0 +1,142 @@ +//===- bolt/unittest/Core/MemoryMaps.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryContext.h" +#include "bolt/Profile/DataAggregator.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; +using namespace bolt; + +namespace opts { +extern cl::opt ReadPerfEvents; +} // namespace opts + +namespace { + +/// Perform checks on memory map events normally captured in perf. Tests use +/// the 'opts::ReadPerfEvents' flag to emulate these events, passing a custom +/// 'perf script' output to DataAggregator. +struct MemoryMapsTester : public testing::TestWithParam { + void SetUp() override { + initalizeLLVM(); + prepareElf(); + initializeBOLT(); + } + +protected: + void initalizeLLVM() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + } + + void prepareElf() { + memcpy(ElfBuf, "\177ELF", 4); + ELF64LE::Ehdr *EHdr = reinterpret_cast(ElfBuf); + EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64; + EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB; + EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64; + MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF"); + ObjFile = cantFail(ObjectFile::createObjectFile(Source)); + } + + void initializeBOLT() { + Relocation::Arch = ObjFile->makeTriple().getArch(); + BC = cantFail(BinaryContext::createBinaryContext( + ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true, + DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()})); + ASSERT_FALSE(!BC); + } + + char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; + std::unique_ptr ObjFile; + std::unique_ptr BC; +}; +} // namespace + +#ifdef X86_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(X86, MemoryMapsTester, + ::testing::Values(Triple::x86_64)); + +#endif + +#ifdef AARCH64_AVAILABLE + +INSTANTIATE_TEST_SUITE_P(AArch64, MemoryMapsTester, + ::testing::Values(Triple::aarch64)); + +#endif + +/// Check that the correct mmap size is computed when we have multiple text +/// segment mappings. +TEST_P(MemoryMapsTester, ParseMultipleSegments) { + const int Pid = 1234; + StringRef Filename = "BINARY"; + opts::ReadPerfEvents = formatv( + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n" + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n", + Pid, Filename); + + BC->SegmentMapInfo[0x11da000] = + SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true}; + BC->SegmentMapInfo[0x31d0000] = + SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000, true}; + + DataAggregator DA(""); + BC->setFilename(Filename); + Error Err = DA.preprocessProfile(*BC); + + // Ignore errors from perf2bolt when parsing memory events later on. + ASSERT_THAT_ERROR(std::move(Err), Succeeded()); + + auto &BinaryMMapInfo = DA.getBinaryMMapInfo(); + auto El = BinaryMMapInfo.find(Pid); + // Check that memory mapping is present and has the expected size. + ASSERT_NE(El, BinaryMMapInfo.end()); + ASSERT_EQ(El->second.Size, static_cast(0xb1d0000)); +} + +/// Check that DataAggregator aborts when pre-processing an input binary +/// with multiple text segments that have different base addresses. +TEST_P(MemoryMapsTester, MultipleSegmentsMismatchedBaseAddress) { + const int Pid = 1234; + StringRef Filename = "BINARY"; + opts::ReadPerfEvents = formatv( + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n" + "name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: " + "[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n", + Pid, Filename); + + BC->SegmentMapInfo[0x11da000] = + SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true}; + // Using '0x31d0fff' FileOffset which triggers a different base address + // for this second text segment. + BC->SegmentMapInfo[0x31d0000] = + SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0fff, 0x3000000, 0x200000, true}; + + DataAggregator DA(""); + BC->setFilename(Filename); + ASSERT_DEATH( + { Error Err = DA.preprocessProfile(*BC); }, + "Base address on multiple segment mappings should match"); +} diff --git a/bolt/utils/bughunter.sh b/bolt/utils/bughunter.sh index 49831cddfdbdd1..c5dddc41fb41fe 100755 --- a/bolt/utils/bughunter.sh +++ b/bolt/utils/bughunter.sh @@ -131,7 +131,7 @@ if [[ $FAIL -eq "0" ]]; then fi else echo "Did it pass? Type the return code [0 = pass, 1 = fail]" - read -n1 PASS + read -n1 FAIL fi if [[ $FAIL -eq "0" ]] ; then echo " Warning: optimized binary passes." @@ -205,7 +205,7 @@ while [[ "$CONTINUE" -ne "0" ]] ; do echo " OPTIMIZED_BINARY failure=$FAIL" else echo "Did it pass? Type the return code [0 = pass, 1 = fail]" - read -n1 PASS + read -n1 FAIL fi else FAIL=1 diff --git a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp index e329588290cd4b..2b2d80ea9346bd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.cpp @@ -303,7 +303,7 @@ void InfiniteLoopCheck::check(const MatchFinder::MatchResult &Result) { } } - if (ExprMutationAnalyzer::isUnevaluated(LoopStmt, *LoopStmt, *Result.Context)) + if (ExprMutationAnalyzer::isUnevaluated(LoopStmt, *Result.Context)) return; if (isAtLeastOneCondVarChanged(Func, LoopStmt, Cond, Result.Context)) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp index 6a6e620a4387b0..f615976c7edb62 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidConstOrRefDataMembersCheck.cpp @@ -13,79 +13,88 @@ using namespace clang::ast_matchers; namespace clang::tidy::cppcoreguidelines { -namespace { -AST_MATCHER(FieldDecl, isMemberOfLambda) { - return Node.getParent()->isLambda(); +static bool isCopyConstructible(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForCopyConstructor() && + Node.needsImplicitCopyConstructor()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isCopyConstructible(*BRD)) + return false; + } + } + if (Node.hasSimpleCopyConstructor()) + return true; + for (CXXConstructorDecl const *Ctor : Node.ctors()) + if (Ctor->isCopyConstructor()) + return !Ctor->isDeleted(); + return false; } -struct MemberFunctionInfo { - bool Declared{}; - bool Deleted{}; -}; - -struct MemberFunctionPairInfo { - MemberFunctionInfo Copy{}; - MemberFunctionInfo Move{}; -}; - -MemberFunctionPairInfo getConstructorsInfo(CXXRecordDecl const &Node) { - MemberFunctionPairInfo Constructors{}; - - for (CXXConstructorDecl const *Ctor : Node.ctors()) { - if (Ctor->isCopyConstructor()) { - Constructors.Copy.Declared = true; - if (Ctor->isDeleted()) - Constructors.Copy.Deleted = true; - } - if (Ctor->isMoveConstructor()) { - Constructors.Move.Declared = true; - if (Ctor->isDeleted()) - Constructors.Move.Deleted = true; +static bool isMoveConstructible(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForMoveConstructor() && + Node.needsImplicitMoveConstructor()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isMoveConstructible(*BRD)) + return false; } } - - return Constructors; + if (Node.hasSimpleMoveConstructor()) + return true; + for (CXXConstructorDecl const *Ctor : Node.ctors()) + if (Ctor->isMoveConstructor()) + return !Ctor->isDeleted(); + return false; } -MemberFunctionPairInfo getAssignmentsInfo(CXXRecordDecl const &Node) { - MemberFunctionPairInfo Assignments{}; - - for (CXXMethodDecl const *Method : Node.methods()) { - if (Method->isCopyAssignmentOperator()) { - Assignments.Copy.Declared = true; - if (Method->isDeleted()) - Assignments.Copy.Deleted = true; +static bool isCopyAssignable(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForCopyAssignment() && + Node.needsImplicitCopyAssignment()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isCopyAssignable(*BRD)) + return false; } + } + if (Node.hasSimpleCopyAssignment()) + return true; + for (CXXMethodDecl const *Method : Node.methods()) + if (Method->isCopyAssignmentOperator()) + return !Method->isDeleted(); + return false; +} - if (Method->isMoveAssignmentOperator()) { - Assignments.Move.Declared = true; - if (Method->isDeleted()) - Assignments.Move.Deleted = true; +static bool isMoveAssignable(CXXRecordDecl const &Node) { + if (Node.needsOverloadResolutionForMoveAssignment() && + Node.needsImplicitMoveAssignment()) { + // unresolved + for (CXXBaseSpecifier const &BS : Node.bases()) { + CXXRecordDecl const *BRD = BS.getType()->getAsCXXRecordDecl(); + if (BRD != nullptr && !isMoveAssignable(*BRD)) + return false; } } - - return Assignments; + if (Node.hasSimpleMoveAssignment()) + return true; + for (CXXMethodDecl const *Method : Node.methods()) + if (Method->isMoveAssignmentOperator()) + return !Method->isDeleted(); + return false; } -AST_MATCHER(CXXRecordDecl, isCopyableOrMovable) { - MemberFunctionPairInfo Constructors = getConstructorsInfo(Node); - MemberFunctionPairInfo Assignments = getAssignmentsInfo(Node); +namespace { - if (Node.hasSimpleCopyConstructor() || - (Constructors.Copy.Declared && !Constructors.Copy.Deleted)) - return true; - if (Node.hasSimpleMoveConstructor() || - (Constructors.Move.Declared && !Constructors.Move.Deleted)) - return true; - if (Node.hasSimpleCopyAssignment() || - (Assignments.Copy.Declared && !Assignments.Copy.Deleted)) - return true; - if (Node.hasSimpleMoveAssignment() || - (Assignments.Move.Declared && !Assignments.Move.Deleted)) - return true; +AST_MATCHER(FieldDecl, isMemberOfLambda) { + return Node.getParent()->isLambda(); +} - return false; +AST_MATCHER(CXXRecordDecl, isCopyableOrMovable) { + return isCopyConstructible(Node) || isMoveConstructible(Node) || + isCopyAssignable(Node) || isMoveAssignable(Node); } } // namespace diff --git a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp index d900978f65a944..71eb2d94cd4f26 100644 --- a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp @@ -8,14 +8,12 @@ #include "UseInternalLinkageCheck.h" #include "../utils/FileExtensionsUtils.h" -#include "../utils/LexerUtils.h" #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersMacros.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" -#include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" #include "llvm/ADT/STLExtras.h" @@ -47,6 +45,8 @@ namespace { AST_MATCHER(Decl, isFirstDecl) { return Node.isFirstDecl(); } +AST_MATCHER(FunctionDecl, hasBody) { return Node.hasBody(); } + static bool isInMainFile(SourceLocation L, SourceManager &SM, const FileExtensionsSet &HeaderFileExtensions) { for (;;) { @@ -103,7 +103,7 @@ void UseInternalLinkageCheck::registerMatchers(MatchFinder *Finder) { // 4. friend hasAncestor(friendDecl())))); Finder->addMatcher( - functionDecl(Common, unless(cxxMethodDecl()), unless(isMain())) + functionDecl(Common, hasBody(), unless(cxxMethodDecl()), unless(isMain())) .bind("fn"), this); Finder->addMatcher(varDecl(Common, hasGlobalStorage()).bind("var"), this); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index dcfe68e020fc93..fec2c20206bc4d 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -207,6 +207,10 @@ Changes in existing checks fix false positive that floating point variable is only used in increment expression. +- Improved :doc:`cppcoreguidelines-avoid-const-or-ref-data-members + ` check to + avoid false positives when detecting a templated class with inheritance. + - Improved :doc:`cppcoreguidelines-init-variables ` check by fixing the insertion location for function pointers. @@ -228,6 +232,11 @@ Changes in existing checks ` check to avoid false positive for C++23 deducing this. +- Improved :doc:`misc-use-internal-linkage + ` check to insert ``static`` + keyword before type qualifiers such as ``const`` and ``volatile`` and fix + false positives for function declaration without body. + - Improved :doc:`modernize-avoid-c-arrays ` check to suggest using ``std::span`` as a replacement for parameters of incomplete C array type in @@ -237,10 +246,6 @@ Changes in existing checks ` check to fix false positive when using loop variable in initializer of lambda capture. -- Improved :doc:`misc-use-internal-linkage - ` check to insert ``static`` keyword - before type qualifiers such as ``const`` and ``volatile``. - - Improved :doc:`modernize-min-max-use-initializer-list ` check by fixing a false positive when only an implicit conversion happened inside an diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst index 7147af9a7919bc..b8bbcc62706101 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/use-internal-linkage.rst @@ -16,7 +16,7 @@ Example: int v1; // can be marked as static - void fn1(); // can be marked as static + void fn1() {} // can be marked as static namespace { // already in anonymous namespace @@ -26,6 +26,9 @@ Example: // already declared as extern extern int v2; + void fn3(); // without function body in all declaration, maybe external linkage + void fn3(); + Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp index e3864be134da3c..19da88300aec46 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/avoid-const-or-ref-data-members.cpp @@ -285,6 +285,28 @@ struct InheritBothFromNonCopyableAndNonMovable : NonCopyable, NonMovable int& x; // OK, non copyable nor movable }; +template struct TemplateInheritFromNonCopyable : NonCopyable +{ + int& x; + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: member 'x' of type 'int &' is a reference +}; + +template struct TemplateInheritFromNonMovable : NonMovable +{ + int& x; + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: member 'x' of type 'int &' is a reference +}; + +template struct TemplateInheritFromNonCopyableNonMovable : NonCopyableNonMovable +{ + int& x; // OK, non copyable nor movable +}; + +template struct TemplateInheritBothFromNonCopyableAndNonMovable : NonCopyable, NonMovable +{ + int& x; // OK, non copyable nor movable +}; + // Test composition struct ContainsNonCopyable { diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp index 8dc739da3a2734..bf0d2c2513e562 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp @@ -13,59 +13,59 @@ void func_template() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_template' // CHECK-FIXES: static void func_template() {} -void func_cpp_inc(); +void func_cpp_inc() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc' -// CHECK-FIXES: static void func_cpp_inc(); +// CHECK-FIXES: static void func_cpp_inc() {} -int* func_cpp_inc_return_ptr(); +int* func_cpp_inc_return_ptr() {} // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc_return_ptr' -// CHECK-FIXES: static int* func_cpp_inc_return_ptr(); +// CHECK-FIXES: static int* func_cpp_inc_return_ptr() {} -const int* func_cpp_inc_return_const_ptr(); +const int* func_cpp_inc_return_const_ptr() {} // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_const_ptr' -// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr(); +// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr() {} -int const* func_cpp_inc_return_ptr_const(); +int const* func_cpp_inc_return_ptr_const() {} // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_ptr_const' -// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const(); +// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const() {} -int * const func_cpp_inc_return_const(); +int * const func_cpp_inc_return_const() {} // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: function 'func_cpp_inc_return_const' -// CHECK-FIXES: static int * const func_cpp_inc_return_const(); +// CHECK-FIXES: static int * const func_cpp_inc_return_const() {} -volatile const int* func_cpp_inc_return_volatile_const_ptr(); +volatile const int* func_cpp_inc_return_volatile_const_ptr() {} // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: function 'func_cpp_inc_return_volatile_const_ptr' -// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr(); +// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr() {} -[[nodiscard]] void func_nodiscard(); +[[nodiscard]] void func_nodiscard() {} // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: function 'func_nodiscard' -// CHECK-FIXES: {{\[\[nodiscard\]\]}} static void func_nodiscard(); +// CHECK-FIXES: {{\[\[nodiscard\]\]}} static void func_nodiscard() {} #define NDS [[nodiscard]] #define NNDS -NDS void func_nds(); +NDS void func_nds() {} // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: function 'func_nds' -// CHECK-FIXES: NDS static void func_nds(); +// CHECK-FIXES: NDS static void func_nds() {} -NNDS void func_nnds(); +NNDS void func_nnds() {} // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: function 'func_nnds' -// CHECK-FIXES: NNDS static void func_nnds(); +// CHECK-FIXES: NNDS static void func_nnds() {} #include "func_cpp.inc" -void func_h_inc(); +void func_h_inc() {} struct S { void method(); }; void S::method() {} -void func_header(); -extern void func_extern(); -static void func_static(); +void func_header() {} +extern void func_extern() {} +static void func_static() {} namespace { -void func_anonymous_ns(); +void func_anonymous_ns() {} } // namespace int main(int argc, const char*argv[]) {} @@ -75,3 +75,13 @@ void func_extern_c_1() {} } extern "C" void func_extern_c_2() {} + +namespace gh117488 { +void func_with_body(); +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_with_body' +// CHECK-FIXES: static void func_with_body(); +void func_with_body() {} + +void func_without_body(); +void func_without_body(); +} diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index b601f4da0b3a93..7396211715a80a 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -176,6 +176,15 @@ Thread Safety Analysis | aaron.puchert\@sap.com (email), aaronpuchert (GitHub), aaronpuchert (Discourse) +Function Effect Analysis +~~~~~~~~~~~~~~~~~~~~~~~~ +| Doug Wyatt +| dwyatt\@apple.com (email), dougsonos (GitHub), dougsonos (Discourse) + +| Sirraide +| aeternalmail\@gmail.com (email), Sirraide (GitHub), Ætérnal (Discord), Sirraide (Discourse) + + Tools ----- These maintainers are responsible for user-facing tools under the Clang diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst index 7afad5b15b2d54..e17d741b0a00eb 100644 --- a/clang/docs/ClangFormat.rst +++ b/clang/docs/ClangFormat.rst @@ -33,7 +33,7 @@ to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code. Clang-format options: --Werror - If set, changes formatting warnings to errors - --Wno-error= - If set don't error out on the specified warning type. + --Wno-error= - If set, don't error out on the specified warning type. =unknown - If set, unknown format options are only warned about. This can be used to enable formatting, even if the configuration contains unknown (newer) options. diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 3f996ceaff156c..481362dba3f51d 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -290,7 +290,7 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | memory management | changes to omp_alloctrait_key enum | :none:`unclaimed` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| memory model | seq_cst clause on flush construct | :none:`unclaimed` | | +| memory model | seq_cst clause on flush construct | :good:`done` | https://github.com/llvm/llvm-project/pull/114072 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | misc | 'omp_all_memory' keyword and use in 'depend' clause | :good:`done` | D125828, D126321 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8bd06fadfdc984..b9986434d09d24 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -471,6 +471,8 @@ Attribute Changes in Clang - Clang now supports ``[[clang::lifetime_capture_by(X)]]``. Similar to lifetimebound, this can be used to specify when a reference to a function parameter is captured by another capturing entity ``X``. +- The ``target_version`` attribute is now only supported for AArch64 and RISC-V architectures. + Improvements to Clang's diagnostics ----------------------------------- @@ -714,6 +716,7 @@ Bug Fixes to C++ Support assumption if they also occur inside of a dependent lambda. (#GH114787) - Clang now uses valid deduced type locations when diagnosing functions with trailing return type missing placeholder return type. (#GH78694) +- Fixed a bug where bounds of partially expanded pack indexing expressions were checked too early. (#GH116105) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 696a574833dad2..1a24b8857674ca 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -4390,17 +4390,17 @@ class PackIndexingExpr final unsigned TransformedExpressions : 31; LLVM_PREFERRED_TYPE(bool) - unsigned ExpandedToEmptyPack : 1; + unsigned FullySubstituted : 1; PackIndexingExpr(QualType Type, SourceLocation EllipsisLoc, SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr, ArrayRef SubstitutedExprs = {}, - bool ExpandedToEmptyPack = false) + bool FullySubstituted = false) : Expr(PackIndexingExprClass, Type, VK_LValue, OK_Ordinary), EllipsisLoc(EllipsisLoc), RSquareLoc(RSquareLoc), SubExprs{PackIdExpr, IndexExpr}, TransformedExpressions(SubstitutedExprs.size()), - ExpandedToEmptyPack(ExpandedToEmptyPack) { + FullySubstituted(FullySubstituted) { auto *Exprs = getTrailingObjects(); std::uninitialized_copy(SubstitutedExprs.begin(), SubstitutedExprs.end(), @@ -4424,12 +4424,16 @@ class PackIndexingExpr final SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr, std::optional Index, ArrayRef SubstitutedExprs = {}, - bool ExpandedToEmptyPack = false); + bool FullySubstituted = false); static PackIndexingExpr *CreateDeserialized(ASTContext &Context, unsigned NumTransformedExprs); + bool isFullySubstituted() const { return FullySubstituted; } + /// Determine if the expression was expanded to empty. - bool expandsToEmptyPack() const { return ExpandedToEmptyPack; } + bool expandsToEmptyPack() const { + return isFullySubstituted() && TransformedExpressions == 0; + } /// Determine the location of the 'sizeof' keyword. SourceLocation getEllipsisLoc() const { return EllipsisLoc; } diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 00c87e71bde31b..d2f5267e4da5ea 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2670,8 +2670,8 @@ class OMPCompareClause final : public OMPClause { } }; -/// This represents 'seq_cst' clause in the '#pragma omp atomic' -/// directive. +/// This represents 'seq_cst' clause in the '#pragma omp atomic|flush' +/// directives. /// /// \code /// #pragma omp atomic seq_cst diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 1ed5c22361ca68..90a52b1dcbf624 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -5922,12 +5922,12 @@ class PackIndexingType final unsigned Size : 31; LLVM_PREFERRED_TYPE(bool) - unsigned ExpandsToEmptyPack : 1; + unsigned FullySubstituted : 1; protected: friend class ASTContext; // ASTContext creates these. PackIndexingType(const ASTContext &Context, QualType Canonical, - QualType Pattern, Expr *IndexExpr, bool ExpandsToEmptyPack, + QualType Pattern, Expr *IndexExpr, bool FullySubstituted, ArrayRef Expansions = {}); public: @@ -5951,7 +5951,9 @@ class PackIndexingType final bool hasSelectedType() const { return getSelectedIndex() != std::nullopt; } - bool expandsToEmptyPack() const { return ExpandsToEmptyPack; } + bool isFullySubstituted() const { return FullySubstituted; } + + bool expandsToEmptyPack() const { return isFullySubstituted() && Size == 0; } ArrayRef getExpansions() const { return {getExpansionsPtr(), Size}; @@ -5965,10 +5967,10 @@ class PackIndexingType final if (hasSelectedType()) getSelectedType().Profile(ID); else - Profile(ID, Context, getPattern(), getIndexExpr(), expandsToEmptyPack()); + Profile(ID, Context, getPattern(), getIndexExpr(), isFullySubstituted()); } static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, - QualType Pattern, Expr *E, bool ExpandsToEmptyPack); + QualType Pattern, Expr *E, bool FullySubstituted); private: const QualType *getExpansionsPtr() const { diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index a8b9c920b617c0..6f1a76bd18fb50 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -473,12 +473,12 @@ let Class = PackIndexingType in { def : Property<"indexExpression", ExprRef> { let Read = [{ node->getIndexExpr() }]; } - def : Property<"expandsToEmptyPack", Bool> { - let Read = [{ node->expandsToEmptyPack() }]; + def : Property<"isFullySubstituted", Bool> { + let Read = [{ node->isFullySubstituted() }]; } def : Creator<[{ - return ctx.getPackIndexingType(pattern, indexExpression, expandsToEmptyPack); + return ctx.getPackIndexingType(pattern, indexExpression, isFullySubstituted); }]>; } diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h index c7a5b016c949d0..7442f4aad531b7 100644 --- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h +++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h @@ -47,8 +47,6 @@ class ExprMutationAnalyzer { const Stmt *findPointeeMutation(const Expr *Exp); const Stmt *findPointeeMutation(const Decl *Dec); - static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, - ASTContext &Context); private: using MutationFinder = const Stmt *(Analyzer::*)(const Expr *); @@ -58,8 +56,6 @@ class ExprMutationAnalyzer { Memoized::ResultMap &MemoizedResults); const Stmt *tryEachDeclRef(const Decl *Dec, MutationFinder Finder); - bool isUnevaluated(const Expr *Exp); - const Stmt *findExprMutation(ArrayRef Matches); const Stmt *findDeclMutation(ArrayRef Matches); const Stmt * @@ -83,6 +79,10 @@ class ExprMutationAnalyzer { ExprMutationAnalyzer(const Stmt &Stm, ASTContext &Context) : Memorized(), A(Stm, Context, Memorized) {} + /// check whether stmt is unevaluated. mutation analyzer will ignore the + /// content in unevaluated stmt. + static bool isUnevaluated(const Stmt *Stm, ASTContext &Context); + bool isMutated(const Expr *Exp) { return findMutation(Exp) != nullptr; } bool isMutated(const Decl *Dec) { return findMutation(Dec) != nullptr; } const Stmt *findMutation(const Expr *Exp) { return A.findMutation(Exp); } @@ -101,11 +101,6 @@ class ExprMutationAnalyzer { return A.findPointeeMutation(Dec); } - static bool isUnevaluated(const Stmt *Smt, const Stmt &Stm, - ASTContext &Context) { - return Analyzer::isUnevaluated(Smt, Stm, Context); - } - private: Memoized Memorized; Analyzer A; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 634253d0032560..14009826f2c550 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3297,7 +3297,7 @@ def Target : InheritableAttr { }]; } -def TargetVersion : InheritableAttr { +def TargetVersion : InheritableAttr, TargetSpecificAttr> { let Spellings = [GCC<"target_version">]; let Args = [StringArgument<"NamesStr">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 290feb58754adb..83c90b3d6e681b 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4750,6 +4750,12 @@ def HLSLAny : LangBuiltin<"HLSL_LANG"> { let Prototype = "bool(...)"; } +def HLSLAsDouble : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_asdouble"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLWaveActiveAnyTrue : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_wave_active_any_true"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index c3d8adcd966a92..548bcc8ad55f48 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -454,6 +454,13 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_bf8_bf8, "V4fV4iV8iV4fiIiIi TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts") TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8, "V4fV4iV8iV4fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") +TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8, "V16fV4iV8iV16fiIiIi", "nc", "gfx950-insts") + +TARGET_BUILTIN(__builtin_amdgcn_permlane16_swap, "V2UiUiUiIbIb", "nc", "permlane16-swap") +TARGET_BUILTIN(__builtin_amdgcn_permlane32_swap, "V2UiUiUiIbIb", "nc", "permlane32-swap") //===----------------------------------------------------------------------===// // GFX12+ only builtins. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index eb05a6a77978af..6ff24c2bc8faad 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11396,7 +11396,7 @@ def err_omp_atomic_weak_no_equality : Error<"expected '==' operator for 'weak' c def err_omp_atomic_several_clauses : Error< "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< - "directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 'relaxed', |}1'acq_rel', 'acquire' or 'release' clause">; + "directive '#pragma omp %0' cannot contain more than one 'seq_cst',%select{ 'relaxed',|}1 'acq_rel', 'acquire' or 'release' clause">; def err_omp_atomic_incompatible_mem_order_clause : Error< "directive '#pragma omp atomic%select{ %0|}1' cannot be used with '%2' clause">; def note_omp_previous_mem_order_clause : Note< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e5f2fec88706d8..40fd48761928b3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5888,12 +5888,24 @@ def target : Joined<["--"], "target=">, Flags<[NoXarchOption]>, def darwin_target_variant : Separate<["-"], "darwin-target-variant">, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption]>, HelpText<"Generate code for an additional runtime variant of the deployment target">; + +//===----------------------------------------------------------------------===// +// Print CPU info options (clang, clang-cl, flang) +//===----------------------------------------------------------------------===// + +let Visibility = [ClangOption, CC1Option, CLOption, FlangOption, FC1Option] in { + def print_supported_cpus : Flag<["-", "--"], "print-supported-cpus">, Group, - Visibility<[ClangOption, CC1Option, CLOption]>, - HelpText<"Print supported cpu models for the given target (if target is not specified," - " it will print the supported cpus for the default target)">, + HelpText<"Print supported cpu models for the given target (if target is not " + "specified,it will print the supported cpus for the default target)">, MarshallingInfoFlag>; + +def : Flag<["-"], "mcpu=help">, Alias; +def : Flag<["-"], "mtune=help">, Alias; + +} // let Visibility = [ClangOption, CC1Option, CLOption, FlangOption, FC1Option] + def print_supported_extensions : Flag<["-", "--"], "print-supported-extensions">, Visibility<[ClangOption, CC1Option, CLOption]>, HelpText<"Print supported -march extensions (RISC-V, AArch64 and ARM only)">, @@ -5903,8 +5915,6 @@ def print_enabled_extensions : Flag<["-", "--"], "print-enabled-extensions">, HelpText<"Print the extensions enabled by the given target and -march/-mcpu options." " (AArch64 and RISC-V only)">, MarshallingInfoFlag>; -def : Flag<["-"], "mcpu=help">, Alias; -def : Flag<["-"], "mtune=help">, Alias; def time : Flag<["-"], "time">, HelpText<"Time individual commands">; def traditional_cpp : Flag<["-", "--"], "traditional-cpp">, diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index ea6b414618c1d0..056fad2cc0ff8c 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -4970,12 +4970,11 @@ struct FormatStyle { /// \version 12 std::vector StatementAttributeLikeMacros; - /// A vector of macros that should be interpreted as complete - /// statements. + /// A vector of macros that should be interpreted as complete statements. /// - /// Typical macros are expressions, and require a semi-colon to be - /// added; sometimes this is not the case, and this allows to make - /// clang-format aware of such cases. + /// Typical macros are expressions and require a semicolon to be added. + /// Sometimes this is not the case, and this allows to make clang-format aware + /// of such cases. /// /// For example: Q_UNUSED /// \version 8 diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5fe23e0d0efd3b..24abd5d95dd844 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -14257,7 +14257,7 @@ class Sema final : public SemaBase { SourceLocation EllipsisLoc, Expr *IndexExpr, SourceLocation RSquareLoc, ArrayRef ExpandedExprs = {}, - bool EmptyPack = false); + bool FullySubstituted = false); /// Handle a C++1z fold-expression: ( expr op ... op expr ). ExprResult ActOnCXXFoldExpr(Scope *S, SourceLocation LParenLoc, Expr *LHS, diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 4f5d14cbd59bbf..f9e08b70d6ab0a 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -1087,10 +1087,6 @@ void APValue::MakeArray(unsigned InitElts, unsigned Size) { Kind = Array; } -MutableArrayRef -setLValueUninit(APValue::LValueBase B, const CharUnits &O, unsigned Size, - bool OnePastTheEnd, bool IsNullPtr); - MutableArrayRef APValue::setMemberPointerUninit(const ValueDecl *Member, bool IsDerivedMember, unsigned Size) { diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp index bdc713ca3e791e..f7ee0fb3ee92da 100644 --- a/clang/lib/AST/ASTConcept.cpp +++ b/clang/lib/AST/ASTConcept.cpp @@ -22,11 +22,11 @@ static void CreateUnsatisfiedConstraintRecord(const ASTContext &C, const UnsatisfiedConstraintRecord &Detail, UnsatisfiedConstraintRecord *TrailingObject) { - if (Detail.is()) - new (TrailingObject) UnsatisfiedConstraintRecord(Detail.get()); + if (auto *E = dyn_cast(Detail)) + new (TrailingObject) UnsatisfiedConstraintRecord(E); else { auto &SubstitutionDiagnostic = - *Detail.get *>(); + *cast *>(Detail); StringRef Message = C.backupStr(SubstitutionDiagnostic.second); auto *NewSubstDiag = new (C) std::pair( SubstitutionDiagnostic.first, Message); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 23df7878a3bf29..80e8c5b9df58e7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -374,10 +374,10 @@ static const Decl &adjustDeclToTemplate(const Decl &D) { llvm::PointerUnion PU = CTSD->getSpecializedTemplateOrPartial(); - return PU.is() - ? *static_cast(PU.get()) + return isa(PU) + ? *static_cast(cast(PU)) : *static_cast( - PU.get()); + cast(PU)); } // Class is instantiated from a member definition of a class template? @@ -6223,13 +6223,11 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr, ArrayRef Expansions, int Index) const { QualType Canonical; - bool ExpandsToEmptyPack = FullySubstituted && Expansions.empty(); if (FullySubstituted && Index != -1) { Canonical = getCanonicalType(Expansions[Index]); } else { llvm::FoldingSetNodeID ID; - PackIndexingType::Profile(ID, *this, Pattern, IndexExpr, - ExpandsToEmptyPack); + PackIndexingType::Profile(ID, *this, Pattern, IndexExpr, FullySubstituted); void *InsertPos = nullptr; PackIndexingType *Canon = DependentPackIndexingTypes.FindNodeOrInsertPos(ID, InsertPos); @@ -6238,7 +6236,7 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr, PackIndexingType::totalSizeToAlloc(Expansions.size()), TypeAlignment); Canon = new (Mem) PackIndexingType(*this, QualType(), Pattern, IndexExpr, - ExpandsToEmptyPack, Expansions); + FullySubstituted, Expansions); DependentPackIndexingTypes.InsertNode(Canon, InsertPos); } Canonical = QualType(Canon, 0); @@ -6248,7 +6246,7 @@ QualType ASTContext::getPackIndexingType(QualType Pattern, Expr *IndexExpr, Allocate(PackIndexingType::totalSizeToAlloc(Expansions.size()), TypeAlignment); auto *T = new (Mem) PackIndexingType(*this, Canonical, Pattern, IndexExpr, - ExpandsToEmptyPack, Expansions); + FullySubstituted, Expansions); Types.push_back(T); return QualType(T, 0); } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index baed1416635432..a0cd57e2e5ee0d 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -104,8 +104,8 @@ namespace clang { char ASTImportError::ID; template - SmallVector - getCanonicalForwardRedeclChain(Redeclarable* D) { + static SmallVector + getCanonicalForwardRedeclChain(Redeclarable *D) { SmallVector Redecls; for (auto *R : D->getFirstDecl()->redecls()) { if (R != D->getFirstDecl()) @@ -126,7 +126,7 @@ namespace clang { llvm_unreachable("Bad declaration kind"); } - void updateFlags(const Decl *From, Decl *To) { + static void updateFlags(const Decl *From, Decl *To) { // Check if some flags or attrs are new in 'From' and copy into 'To'. // FIXME: Other flags or attrs? if (From->isUsed(false) && !To->isUsed(false)) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 7cf2519d6a71fb..6add18ef4e1afb 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -1642,22 +1642,8 @@ bool Compiler::VisitImplicitValueInitExpr( if (QT->isIncompleteArrayType()) return true; - if (QT->isArrayType()) { - const ArrayType *AT = QT->getAsArrayTypeUnsafe(); - assert(AT); - const auto *CAT = cast(AT); - size_t NumElems = CAT->getZExtSize(); - PrimType ElemT = classifyPrim(CAT->getElementType()); - - for (size_t I = 0; I != NumElems; ++I) { - if (!this->visitZeroInitializer(ElemT, CAT->getElementType(), E)) - return false; - if (!this->emitInitElem(ElemT, I, E)) - return false; - } - - return true; - } + if (QT->isArrayType()) + return this->visitZeroArrayInitializer(QT, E); if (const auto *ComplexTy = E->getType()->getAs()) { assert(Initializing); @@ -3916,18 +3902,9 @@ bool Compiler::visitZeroRecordInitializer(const Record *R, return false; } } else if (D->isCompositeArray()) { - const Record *ElemRecord = D->ElemDesc->ElemRecord; - assert(D->ElemDesc->ElemRecord); - for (uint32_t I = 0, N = D->getNumElems(); I != N; ++I) { - if (!this->emitConstUint32(I, E)) - return false; - if (!this->emitArrayElemPtr(PT_Uint32, E)) - return false; - if (!this->visitZeroRecordInitializer(ElemRecord, E)) - return false; - if (!this->emitPopPtr(E)) - return false; - } + // Can't be a vector or complex field. + if (!this->visitZeroArrayInitializer(D->getType(), E)) + return false; } else if (D->isRecord()) { if (!this->visitZeroRecordInitializer(D->ElemRecord, E)) return false; @@ -3958,6 +3935,52 @@ bool Compiler::visitZeroRecordInitializer(const Record *R, return true; } +template +bool Compiler::visitZeroArrayInitializer(QualType T, const Expr *E) { + assert(T->isArrayType() || T->isAnyComplexType() || T->isVectorType()); + const ArrayType *AT = T->getAsArrayTypeUnsafe(); + QualType ElemType = AT->getElementType(); + size_t NumElems = cast(AT)->getZExtSize(); + + if (std::optional ElemT = classify(ElemType)) { + for (size_t I = 0; I != NumElems; ++I) { + if (!this->visitZeroInitializer(*ElemT, ElemType, E)) + return false; + if (!this->emitInitElem(*ElemT, I, E)) + return false; + } + return true; + } else if (ElemType->isRecordType()) { + const Record *R = getRecord(ElemType); + + for (size_t I = 0; I != NumElems; ++I) { + if (!this->emitConstUint32(I, E)) + return false; + if (!this->emitArrayElemPtr(PT_Uint32, E)) + return false; + if (!this->visitZeroRecordInitializer(R, E)) + return false; + if (!this->emitPopPtr(E)) + return false; + } + return true; + } else if (ElemType->isArrayType()) { + for (size_t I = 0; I != NumElems; ++I) { + if (!this->emitConstUint32(I, E)) + return false; + if (!this->emitArrayElemPtr(PT_Uint32, E)) + return false; + if (!this->visitZeroArrayInitializer(ElemType, E)) + return false; + if (!this->emitPopPtr(E)) + return false; + } + return true; + } + + return false; +} + template template bool Compiler::emitConst(T Value, PrimType Ty, const Expr *E) { @@ -4033,7 +4056,7 @@ unsigned Compiler::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, // (int){12} in C. Consider using Expr::isTemporaryObject() instead // or isa(). Descriptor *D = P.createDescriptor(Src, Ty, Descriptor::InlineDescMD, IsConst, - Src.is()); + isa(Src)); Scope::Local Local = this->createLocal(D); if (auto *VD = dyn_cast_if_present(Src.dyn_cast())) Locals.insert({VD, Local}); diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index d1b624daba6b99..2a94f5ec76b6c5 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -325,6 +325,7 @@ class Compiler : public ConstStmtVisitor, bool>, /// Emits a zero initializer. bool visitZeroInitializer(PrimType T, QualType QT, const Expr *E); bool visitZeroRecordInitializer(const Record *R, const Expr *E); + bool visitZeroArrayInitializer(QualType T, const Expr *E); /// Emits an APSInt constant. bool emitConst(const llvm::APSInt &Value, PrimType Ty, const Expr *E); diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index 85522ffd32dcc6..496c1dcef59b51 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -33,7 +33,7 @@ using namespace clang; using namespace clang::interp; -template inline T ReadArg(Program &P, CodePtr &OpPC) { +template inline static T ReadArg(Program &P, CodePtr &OpPC) { if constexpr (std::is_pointer_v) { uint32_t ID = OpPC.read(); return reinterpret_cast(P.getNativePointer(ID)); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 144f2291651ccf..b450d8263c30bf 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -47,7 +47,7 @@ static APSInt getAPSIntParam(const InterpFrame *Frame, unsigned Index) { return R; } -PrimType getIntPrimType(const InterpState &S) { +static PrimType getIntPrimType(const InterpState &S) { const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned IntWidth = TI.getIntWidth(); @@ -58,7 +58,7 @@ PrimType getIntPrimType(const InterpState &S) { llvm_unreachable("Int isn't 16 or 32 bit?"); } -PrimType getLongPrimType(const InterpState &S) { +static PrimType getLongPrimType(const InterpState &S) { const TargetInfo &TI = S.getASTContext().getTargetInfo(); unsigned LongWidth = TI.getLongWidth(); diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp index 7f02464a1c0f14..20f67d9b1fd425 100644 --- a/clang/lib/AST/ByteCode/InterpFrame.cpp +++ b/clang/lib/AST/ByteCode/InterpFrame.cpp @@ -234,7 +234,12 @@ SourceInfo InterpFrame::getSource(CodePtr PC) const { if (Func && !funcHasUsableBody(Func) && Caller) return Caller->getSource(RetPC); - return S.getSource(Func, PC); + // Similarly, if the resulting source location is invalid anyway, + // point to the caller instead. + SourceInfo Result = S.getSource(Func, PC); + if (Result.getLoc().isInvalid() && Caller) + return Caller->getSource(RetPC); + return Result; } const Expr *InterpFrame::getExpr(CodePtr PC) const { diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index 590ee19de6d2f0..c98a3506b0a90b 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -158,7 +158,7 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) { if (const auto *E = D.dyn_cast()) { QT = E->getType(); } else { - const ValueDecl *VD = cast(D.get()); + const ValueDecl *VD = cast(cast(D)); IsWeak = VD->isWeak(); QT = VD->getType(); if (const auto *RT = QT->getAs()) diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index e37ebec0851951..07c4419e3cf407 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -388,9 +388,8 @@ ExprDependence clang::computeDependence(PackIndexingExpr *E) { ExprDependence::Instantiation; ArrayRef Exprs = E->getExpressions(); - if (Exprs.empty()) + if (Exprs.empty() || !E->isFullySubstituted()) D |= PatternDep | ExprDependence::Instantiation; - else if (!E->getIndexExpr()->isInstantiationDependent()) { std::optional Index = E->getSelectedIndex(); assert(Index && *Index < Exprs.size() && "pack index out of bound"); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index bfeb4827f79587..741e908cf9bc56 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1991,7 +1991,7 @@ void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2010,7 +2010,7 @@ void DeclaratorDecl::setTrailingRequiresClause(Expr *TrailingRequiresClause) { // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2026,7 +2026,7 @@ void DeclaratorDecl::setTemplateParameterListsInfo( // Make sure the extended decl info is allocated. if (!hasExtInfo()) { // Save (non-extended) type source info pointer. - auto *savedTInfo = DeclInfo.get(); + auto *savedTInfo = cast(DeclInfo); // Allocate external info struct. DeclInfo = new (getASTContext()) ExtInfo; // Restore savedTInfo into (extended) decl info. @@ -2534,7 +2534,7 @@ EvaluatedStmt *VarDecl::ensureEvaluatedStmt() const { // work to avoid leaking those, but we do so in VarDecl::evaluateValue // where we can detect whether there's anything to clean up or not. Eval = new (getASTContext()) EvaluatedStmt; - Eval->Value = Init.get(); + Eval->Value = cast(Init); Init = Eval; } return Eval; @@ -3017,7 +3017,7 @@ void ParmVarDecl::setUninstantiatedDefaultArg(Expr *arg) { Expr *ParmVarDecl::getUninstantiatedDefaultArg() { assert(hasUninstantiatedDefaultArg() && "Wrong kind of initialization expression!"); - return cast_if_present(Init.get()); + return cast_if_present(cast(Init)); } bool ParmVarDecl::hasDefaultArg() const { @@ -4010,12 +4010,12 @@ FunctionDecl::TemplatedKind FunctionDecl::getTemplatedKind() const { "No other valid types in NamedDecl"); return TK_FunctionTemplate; } - if (TemplateOrSpecialization.is()) + if (isa(TemplateOrSpecialization)) return TK_MemberSpecialization; - if (TemplateOrSpecialization.is()) + if (isa(TemplateOrSpecialization)) return TK_FunctionTemplateSpecialization; - if (TemplateOrSpecialization.is - ()) + if (isa( + TemplateOrSpecialization)) return TK_DependentFunctionTemplateSpecialization; llvm_unreachable("Did we miss a TemplateOrSpecialization type?"); @@ -4062,9 +4062,9 @@ void FunctionDecl::setDescribedFunctionTemplate( } bool FunctionDecl::isFunctionTemplateSpecialization() const { - return TemplateOrSpecialization.is() || - TemplateOrSpecialization - .is(); + return isa(TemplateOrSpecialization) || + isa( + TemplateOrSpecialization); } void FunctionDecl::setInstantiatedFromDecl(FunctionDecl *FD) { @@ -4216,7 +4216,7 @@ void FunctionDecl::setFunctionTemplateSpecialization( const TemplateArgumentListInfo *TemplateArgsAsWritten, SourceLocation PointOfInstantiation) { assert((TemplateOrSpecialization.isNull() || - TemplateOrSpecialization.is()) && + isa(TemplateOrSpecialization)) && "Member function is already a specialization"); assert(TSK != TSK_Undeclared && "Must specify the type of function template specialization"); @@ -4287,8 +4287,8 @@ TemplateSpecializationKind FunctionDecl::getTemplateSpecializationKind() const { // A dependent function template specialization is an explicit specialization, // except when it's a friend declaration. - if (TemplateOrSpecialization - .is() && + if (isa( + TemplateOrSpecialization) && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; @@ -4331,8 +4331,8 @@ FunctionDecl::getTemplateSpecializationKindForInstantiation() const { TemplateOrSpecialization.dyn_cast()) return MSInfo->getTemplateSpecializationKind(); - if (TemplateOrSpecialization - .is() && + if (isa( + TemplateOrSpecialization) && getFriendObjectKind() == FOK_None) return TSK_ExplicitSpecialization; diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 96638b85c452b4..fb701f76231bcd 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1500,7 +1500,8 @@ DeclContext *DeclContext::getPrimaryContext() { } template -void collectAllContextsImpl(T *Self, SmallVectorImpl &Contexts) { +static void collectAllContextsImpl(T *Self, + SmallVectorImpl &Contexts) { for (T *D = Self->getMostRecentDecl(); D; D = D->getPreviousDecl()) Contexts.push_back(D); diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 39c548e9c22539..25560faae8672b 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2733,14 +2733,14 @@ int64_t CXXCtorInitializer::getID(const ASTContext &Context) const { TypeLoc CXXCtorInitializer::getBaseClassLoc() const { if (isBaseInitializer()) - return Initializee.get()->getTypeLoc(); + return cast(Initializee)->getTypeLoc(); else return {}; } const Type *CXXCtorInitializer::getBaseClass() const { if (isBaseInitializer()) - return Initializee.get()->getType().getTypePtr(); + return cast(Initializee)->getType().getTypePtr(); else return nullptr; } @@ -2752,7 +2752,7 @@ SourceLocation CXXCtorInitializer::getSourceLocation() const { if (isAnyMemberInitializer()) return getMemberLocation(); - if (const auto *TSInfo = Initializee.get()) + if (const auto *TSInfo = cast(Initializee)) return TSInfo->getTypeLoc().getBeginLoc(); return {}; diff --git a/clang/lib/AST/DeclFriend.cpp b/clang/lib/AST/DeclFriend.cpp index d003842bfb7c74..6bfc2eb62b2843 100644 --- a/clang/lib/AST/DeclFriend.cpp +++ b/clang/lib/AST/DeclFriend.cpp @@ -36,8 +36,7 @@ FriendDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, SourceLocation EllipsisLoc, ArrayRef FriendTypeTPLists) { #ifndef NDEBUG - if (Friend.is()) { - const auto *D = Friend.get(); + if (const auto *D = dyn_cast(Friend)) { assert(isa(D) || isa(D) || isa(D) || diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index f487032a37ab73..1da3f26bf23cd5 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -992,7 +992,7 @@ ClassTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } SourceRange @@ -1008,7 +1008,7 @@ ClassTemplateSpecializationDecl::getSourceRange() const { if (const auto *CTPSD = Pattern.dyn_cast()) return CTPSD->getSourceRange(); - return Pattern.get()->getSourceRange(); + return cast(Pattern)->getSourceRange(); } case TSK_ExplicitSpecialization: { SourceRange Range = CXXRecordDecl::getSourceRange(); @@ -1404,7 +1404,7 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return cast(SpecializedTemplate); } SourceRange VarTemplateSpecializationDecl::getSourceRange() const { @@ -1419,7 +1419,7 @@ SourceRange VarTemplateSpecializationDecl::getSourceRange() const { if (const auto *VTPSD = Pattern.dyn_cast()) return VTPSD->getSourceRange(); - VarTemplateDecl *VTD = Pattern.get(); + VarTemplateDecl *VTD = cast(Pattern); if (hasInit()) { if (VarTemplateDecl *Definition = VTD->getDefinition()) return Definition->getSourceRange(); diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 0ce129de85f03f..fc09d24fc30cb4 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -162,7 +162,7 @@ QualType CXXTypeidExpr::getTypeOperand(const ASTContext &Context) const { assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)"); Qualifiers Quals; return Context.getUnqualifiedArrayType( - Operand.get()->getType().getNonReferenceType(), Quals); + cast(Operand)->getType().getNonReferenceType(), Quals); } static bool isGLValueFromPointerDeref(const Expr *E) { @@ -216,7 +216,7 @@ QualType CXXUuidofExpr::getTypeOperand(ASTContext &Context) const { assert(isTypeOperand() && "Cannot call getTypeOperand for __uuidof(expr)"); Qualifiers Quals; return Context.getUnqualifiedArrayType( - Operand.get()->getType().getNonReferenceType(), Quals); + cast(Operand)->getType().getNonReferenceType(), Quals); } // CXXScalarValueInitExpr @@ -1717,9 +1717,9 @@ NonTypeTemplateParmDecl *SubstNonTypeTemplateParmExpr::getParameter() const { PackIndexingExpr *PackIndexingExpr::Create( ASTContext &Context, SourceLocation EllipsisLoc, SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr, std::optional Index, - ArrayRef SubstitutedExprs, bool ExpandedToEmptyPack) { + ArrayRef SubstitutedExprs, bool FullySubstituted) { QualType Type; - if (Index && !SubstitutedExprs.empty()) + if (Index && FullySubstituted && !SubstitutedExprs.empty()) Type = SubstitutedExprs[*Index]->getType(); else Type = Context.DependentTy; @@ -1728,7 +1728,7 @@ PackIndexingExpr *PackIndexingExpr::Create( Context.Allocate(totalSizeToAlloc(SubstitutedExprs.size())); return new (Storage) PackIndexingExpr(Type, EllipsisLoc, RSquareLoc, PackIdExpr, IndexExpr, - SubstitutedExprs, ExpandedToEmptyPack); + SubstitutedExprs, FullySubstituted); } NamedDecl *PackIndexingExpr::getPackDecl() const { @@ -1829,11 +1829,11 @@ void MaterializeTemporaryExpr::setExtendingDecl(ValueDecl *ExtendedBy, // We may need to allocate extra storage for the mangling number and the // extended-by ValueDecl. - if (!State.is()) + if (!isa(State)) State = LifetimeExtendedTemporaryDecl::Create( - cast(State.get()), ExtendedBy, ManglingNumber); + cast(cast(State)), ExtendedBy, ManglingNumber); - auto ES = State.get(); + auto ES = cast(State); ES->ExtendingDecl = ExtendedBy; ES->ManglingNumber = ManglingNumber; } diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp index 6efe73ea085a79..e6afcdd5dc3e86 100644 --- a/clang/lib/AST/ExprConcepts.cpp +++ b/clang/lib/AST/ExprConcepts.cpp @@ -94,8 +94,7 @@ ConceptSpecializationExpr::Create(const ASTContext &C, ConceptReference *Loc, const TypeConstraint * concepts::ExprRequirement::ReturnTypeRequirement::getTypeConstraint() const { assert(isTypeConstraint()); - auto TPL = - TypeConstraintInfo.getPointer().get(); + auto TPL = cast(TypeConstraintInfo.getPointer()); return cast(TPL->getParam(0)) ->getTypeConstraint(); } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c6a210459240a8..c6d003073966f3 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3824,8 +3824,8 @@ static QualType getSubobjectType(QualType ObjType, QualType SubobjType, } /// Find the designated sub-object of an rvalue. -template -typename SubobjectHandler::result_type +template +static typename SubobjectHandler::result_type findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const SubobjectDesignator &Sub, SubobjectHandler &handler) { if (Sub.Invalid) @@ -7106,7 +7106,7 @@ static std::optional CheckDeleteKind(EvalInfo &Info, const Expr *E, } // Perform a call to 'operator delete' or '__builtin_operator_delete'. -bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) { +static bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) { if (Info.checkingPotentialConstantExpression() || Info.SpeculativeEvaluationDepth) return false; diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp index bf152ca35431cd..a1b2551419f5e6 100644 --- a/clang/lib/AST/ItaniumCXXABI.cpp +++ b/clang/lib/AST/ItaniumCXXABI.cpp @@ -75,17 +75,17 @@ struct DecompositionDeclName { } namespace llvm { -template bool isDenseMapKeyEmpty(T V) { +template static bool isDenseMapKeyEmpty(T V) { return llvm::DenseMapInfo::isEqual( V, llvm::DenseMapInfo::getEmptyKey()); } -template bool isDenseMapKeyTombstone(T V) { +template static bool isDenseMapKeyTombstone(T V) { return llvm::DenseMapInfo::isEqual( V, llvm::DenseMapInfo::getTombstoneKey()); } template -std::optional areDenseMapKeysEqualSpecialValues(T LHS, T RHS) { +static std::optional areDenseMapKeysEqualSpecialValues(T LHS, T RHS) { bool LHSEmpty = isDenseMapKeyEmpty(LHS); bool RHSEmpty = isDenseMapKeyEmpty(RHS); if (LHSEmpty || RHSEmpty) diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp index 9723c0cfa83bbe..af7d9fcdc638be 100644 --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -50,7 +50,7 @@ DynTypedNode ParentMapContext::traverseIgnored(const DynTypedNode &N) const { } template -std::tuple +static std::tuple matchParents(const DynTypedNodeList &NodeList, ParentMapContext::ParentMap *ParentMap); @@ -107,7 +107,7 @@ class ParentMapContext::ParentMap { return DynTypedNode::create(*D); if (const auto *S = U.dyn_cast()) return DynTypedNode::create(*S); - return *U.get(); + return *cast(U); } template @@ -127,17 +127,17 @@ class ParentMapContext::ParentMap { ParentMap(ASTContext &Ctx); ~ParentMap() { for (const auto &Entry : PointerParents) { - if (Entry.second.is()) { - delete Entry.second.get(); - } else if (Entry.second.is()) { - delete Entry.second.get(); + if (auto *DTN = dyn_cast(Entry.second)) { + delete DTN; + } else if (auto *PV = dyn_cast(Entry.second)) { + delete PV; } } for (const auto &Entry : OtherParents) { - if (Entry.second.is()) { - delete Entry.second.get(); - } else if (Entry.second.is()) { - delete Entry.second.get(); + if (auto *DTN = dyn_cast(Entry.second)) { + delete DTN; + } else if (auto *PV = dyn_cast(Entry.second)) { + delete PV; } } } @@ -392,14 +392,14 @@ class ParentMapContext::ParentMap::ASTVisitor else NodeOrVector = new DynTypedNode(ParentStack.back()); } else { - if (!NodeOrVector.template is()) { + if (!isa(NodeOrVector)) { auto *Vector = new ParentVector( 1, getSingleDynTypedNodeFromParentMap(NodeOrVector)); delete NodeOrVector.template dyn_cast(); NodeOrVector = Vector; } - auto *Vector = NodeOrVector.template get(); + auto *Vector = cast(NodeOrVector); // Skip duplicates for types that have memoization data. // We must check that the type has memoization data before calling // llvm::is_contained() because DynTypedNode::operator== can't compare all diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp index c500507fecdf59..7d6275caedc4f5 100644 --- a/clang/lib/AST/TemplateName.cpp +++ b/clang/lib/AST/TemplateName.cpp @@ -151,13 +151,13 @@ TemplateName::NameKind TemplateName::getKind() const { return Template; } - if (Storage.is()) + if (isa(Storage)) return DependentTemplate; - if (Storage.is()) + if (isa(Storage)) return QualifiedTemplate; - UncommonTemplateNameStorage *uncommon - = Storage.get(); + UncommonTemplateNameStorage *uncommon = + cast(Storage); if (uncommon->getAsOverloadedStorage()) return OverloadedTemplate; if (uncommon->getAsAssumedTemplateName()) diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index b70f86ef31442d..edf20944f0b3ed 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -4031,12 +4031,12 @@ void DependentDecltypeType::Profile(llvm::FoldingSetNodeID &ID, PackIndexingType::PackIndexingType(const ASTContext &Context, QualType Canonical, QualType Pattern, - Expr *IndexExpr, bool ExpandsToEmptyPack, + Expr *IndexExpr, bool FullySubstituted, ArrayRef Expansions) : Type(PackIndexing, Canonical, computeDependence(Pattern, IndexExpr, Expansions)), Context(Context), Pattern(Pattern), IndexExpr(IndexExpr), - Size(Expansions.size()), ExpandsToEmptyPack(ExpandsToEmptyPack) { + Size(Expansions.size()), FullySubstituted(FullySubstituted) { std::uninitialized_copy(Expansions.begin(), Expansions.end(), getTrailingObjects()); @@ -4081,10 +4081,10 @@ PackIndexingType::computeDependence(QualType Pattern, Expr *IndexExpr, void PackIndexingType::Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, QualType Pattern, - Expr *E, bool ExpandsToEmptyPack) { + Expr *E, bool FullySubstituted) { Pattern.Profile(ID); E->Profile(ID, Context, true); - ID.AddBoolean(ExpandsToEmptyPack); + ID.AddBoolean(FullySubstituted); } UnaryTransformType::UnaryTransformType(QualType BaseType, diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index 5a95ef36d05024..be0e8aa5743dd9 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -231,12 +231,12 @@ ExprMutationAnalyzer::Analyzer::findPointeeMutation(const Decl *Dec) { const Stmt *ExprMutationAnalyzer::Analyzer::findMutationMemoized( const Expr *Exp, llvm::ArrayRef Finders, Memoized::ResultMap &MemoizedResults) { + // Assume Exp is not mutated before analyzing Exp. auto [Memoized, Inserted] = MemoizedResults.try_emplace(Exp); if (!Inserted) return Memoized->second; - // Assume Exp is not mutated before analyzing Exp. - if (isUnevaluated(Exp)) + if (ExprMutationAnalyzer::isUnevaluated(Exp, Context)) return nullptr; for (const auto &Finder : Finders) { @@ -268,41 +268,29 @@ ExprMutationAnalyzer::Analyzer::tryEachDeclRef(const Decl *Dec, return nullptr; } -bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Stmt *Exp, - const Stmt &Stm, - ASTContext &Context) { - return selectFirst( - NodeID::value, - match( - findFirst( - stmt(canResolveToExpr(Exp), - anyOf( - // `Exp` is part of the underlying expression of - // decltype/typeof if it has an ancestor of - // typeLoc. - hasAncestor(typeLoc(unless( - hasAncestor(unaryExprOrTypeTraitExpr())))), - hasAncestor(expr(anyOf( - // `UnaryExprOrTypeTraitExpr` is unevaluated - // unless it's sizeof on VLA. - unaryExprOrTypeTraitExpr(unless(sizeOfExpr( - hasArgumentOfType(variableArrayType())))), - // `CXXTypeidExpr` is unevaluated unless it's - // applied to an expression of glvalue of - // polymorphic class type. - cxxTypeidExpr( - unless(isPotentiallyEvaluated())), - // The controlling expression of - // `GenericSelectionExpr` is unevaluated. - genericSelectionExpr(hasControllingExpr( - hasDescendant(equalsNode(Exp)))), - cxxNoexceptExpr()))))) - .bind(NodeID::value)), - Stm, Context)) != nullptr; -} - -bool ExprMutationAnalyzer::Analyzer::isUnevaluated(const Expr *Exp) { - return isUnevaluated(Exp, Stm, Context); +bool ExprMutationAnalyzer::isUnevaluated(const Stmt *Stm, ASTContext &Context) { + return !match(stmt(anyOf( + // `Exp` is part of the underlying expression of + // decltype/typeof if it has an ancestor of + // typeLoc. + hasAncestor(typeLoc( + unless(hasAncestor(unaryExprOrTypeTraitExpr())))), + hasAncestor(expr(anyOf( + // `UnaryExprOrTypeTraitExpr` is unevaluated + // unless it's sizeof on VLA. + unaryExprOrTypeTraitExpr(unless(sizeOfExpr( + hasArgumentOfType(variableArrayType())))), + // `CXXTypeidExpr` is unevaluated unless it's + // applied to an expression of glvalue of + // polymorphic class type. + cxxTypeidExpr(unless(isPotentiallyEvaluated())), + // The controlling expression of + // `GenericSelectionExpr` is unevaluated. + genericSelectionExpr( + hasControllingExpr(hasDescendant(equalsNode(Stm)))), + cxxNoexceptExpr()))))), + *Stm, Context) + .empty(); } const Stmt * diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp index 6904bce3ac51ec..fa26cc584b724a 100644 --- a/clang/lib/Basic/Attributes.cpp +++ b/clang/lib/Basic/Attributes.cpp @@ -156,7 +156,7 @@ std::string AttributeCommonInfo::getNormalizedFullName() const { normalizeName(getAttrName(), getScopeName(), getSyntax())); } -AttributeCommonInfo::Scope +static AttributeCommonInfo::Scope getScopeFromNormalizedScopeName(StringRef ScopeName) { return llvm::StringSwitch(ScopeName) .Case("", AttributeCommonInfo::Scope::NONE) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4b96bdb709c777..3db439c87fa326 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -209,6 +209,41 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } +static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) { + assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() && + E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) && + "asdouble operands types mismatch"); + Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0)); + Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1)); + + llvm::Type *ResultType = CGF.DoubleTy; + int N = 1; + if (auto *VTy = E->getArg(0)->getType()->getAs()) { + N = VTy->getNumElements(); + ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N); + } + + if (CGF.CGM.getTarget().getTriple().isDXIL()) + return CGF.Builder.CreateIntrinsic( + /*ReturnType=*/ResultType, Intrinsic::dx_asdouble, + ArrayRef{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble"); + + if (!E->getArg(0)->getType()->isVectorType()) { + OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits); + OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits); + } + + llvm::SmallVector Mask; + for (int i = 0; i < N; i++) { + Mask.push_back(i); + Mask.push_back(i + N); + } + + Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask); + + return CGF.Builder.CreateBitCast(BitVec, ResultType); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -18993,7 +19028,7 @@ static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { return RT.getUDotIntrinsic(); } -Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { +static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { if (QT->hasSignedIntegerRepresentation()) { return RT.getFirstBitSHighIntrinsic(); } @@ -19023,6 +19058,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.any"); } + case Builtin::BI__builtin_hlsl_asdouble: + return handleAsDoubleBuiltin(*this, E); case Builtin::BI__builtin_hlsl_elementwise_clamp: { Value *OpX = EmitScalarExpr(E->getArg(0)); Value *OpMin = EmitScalarExpr(E->getArg(1)); @@ -20163,6 +20200,32 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); return Builder.CreateCall(F, {Arg}); } + case AMDGPU::BI__builtin_amdgcn_permlane16_swap: + case AMDGPU::BI__builtin_amdgcn_permlane32_swap: { + // Because builtin types are limited, and the intrinsic uses a struct/pair + // output, marshal the pair-of-i32 to <2 x i32>. + Value *VDstOld = EmitScalarExpr(E->getArg(0)); + Value *VSrcOld = EmitScalarExpr(E->getArg(1)); + Value *FI = EmitScalarExpr(E->getArg(2)); + Value *BoundCtrl = EmitScalarExpr(E->getArg(3)); + Function *F = + CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap + ? Intrinsic::amdgcn_permlane16_swap + : Intrinsic::amdgcn_permlane32_swap); + llvm::CallInst *Call = + Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl}); + + llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0); + llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1); + + llvm::Type *ResultType = ConvertType(E->getType()); + + llvm::Value *Insert0 = Builder.CreateInsertElement( + llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0)); + llvm::Value *AsVector = + Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1)); + return AsVector; + } case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: return emitBuiltinWithOneOverloadedType<4>( *this, E, Intrinsic::amdgcn_make_buffer_rsrc); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index a0f4329e36136b..ad14b5c9b6dc80 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4417,7 +4417,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Use the -mcpu=? flag as the dummy input to cc1. Actions.clear(); - Action *InputAc = C.MakeAction(*A, types::TY_C); + Action *InputAc = C.MakeAction( + *A, IsFlangMode() ? types::TY_Fortran : types::TY_C); Actions.push_back( C.MakeAction(InputAc, types::TY_Nothing)); for (auto &I : Inputs) @@ -6621,8 +6622,8 @@ bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const { return false; // And say "no" if this is not a kind of action flang understands. - if (!isa(JA) && !isa(JA) && - !isa(JA)) + if (!isa(JA) && !isa(JA) && + !isa(JA) && !isa(JA)) return false; return true; diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index ddd5ea248ca0cc..102794829795da 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -856,8 +856,9 @@ void CudaToolChain::addClangTargetOptions( DeviceOffloadingKind == Action::OFK_Cuda) && "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); - CC1Args.append( - {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); + CC1Args.append({"-fcuda-is-device", "-mllvm", + "-enable-memcpyopt-without-libcalls", + "-fno-threadsafe-statics"}); // Unsized function arguments used for variadics were introduced in CUDA-9.0 // We still do not support generating code that actually uses variadic diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 11070c23c75f4a..a57e1524a0b85d 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -747,6 +747,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, } } else if (isa(JA)) { CmdArgs.push_back("-emit-obj"); + } else if (isa(JA)) { + // The precompile job action is only needed for options such as -mcpu=help. + // Those will already have been handled by the fc1 driver. } else { assert(false && "Unexpected action class for Flang tool."); } @@ -911,8 +914,6 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Output.getFilename()); } - assert(Input.isFilename() && "Invalid input."); - if (Args.getLastArg(options::OPT_save_temps_EQ)) Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ); @@ -932,7 +933,18 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, } } - CmdArgs.push_back(Input.getFilename()); + // The input could be Ty_Nothing when "querying" options such as -mcpu=help + // are used. + ArrayRef FrontendInputs = Input; + if (Input.isNothing()) + FrontendInputs = {}; + + for (const InputInfo &Input : FrontendInputs) { + if (Input.isFilename()) + CmdArgs.push_back(Input.getFilename()); + else + Input.getInputArg().renderAsInput(Args, CmdArgs); + } const char *Exec = Args.MakeArgString(D.GetProgramPath("flang", TC)); C.addCommand(std::make_unique(JA, *this, diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 4eb8c4f58923fd..42c48f6c9b7743 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -238,7 +238,7 @@ void HIPAMDToolChain::addClangTargetOptions( assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); - CC1Args.push_back("-fcuda-is-device"); + CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"}); if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index fd53969e4b3b33..aed86c1fb99551 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -693,17 +693,14 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, bool DisallowLineBreaksOnThisLine = Style.LambdaBodyIndentation == FormatStyle::LBI_Signature && - Style.isCpp() && [&Current] { - // Deal with lambda arguments in C++. The aim here is to ensure that we - // don't over-indent lambda function bodies when lambdas are passed as - // arguments to function calls. We do this by ensuring that either all - // arguments (including any lambdas) go on the same line as the function - // call, or we break before the first argument. - const auto *Prev = Current.Previous; - if (!Prev) - return false; + // Deal with lambda arguments in C++. The aim here is to ensure that we + // don't over-indent lambda function bodies when lambdas are passed as + // arguments to function calls. We do this by ensuring that either all + // arguments (including any lambdas) go on the same line as the function + // call, or we break before the first argument. + Style.isCpp() && [&] { // For example, `/*Newline=*/false`. - if (Prev->is(TT_BlockComment) && Current.SpacesRequiredBefore == 0) + if (Previous.is(TT_BlockComment) && Current.SpacesRequiredBefore == 0) return false; const auto *PrevNonComment = Current.getPreviousNonComment(); if (!PrevNonComment || PrevNonComment->isNot(tok::l_paren)) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index a484d04155d6b2..a3e0b5c65a6f52 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -361,6 +361,24 @@ bool any(double3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_any) bool any(double4); +//===----------------------------------------------------------------------===// +// asdouble builtins +//===----------------------------------------------------------------------===// + +/// \fn double asdouble(uint LowBits, uint HighBits) +/// \brief Reinterprets a cast value (two 32-bit values) into a double. +/// \param LowBits The low 32-bit pattern of the input value. +/// \param HighBits The high 32-bit pattern of the input value. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double asdouble(uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double2 asdouble(uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double3 asdouble(uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_asdouble) +double4 asdouble(uint4, uint4); + //===----------------------------------------------------------------------===// // asfloat builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index f6d787a0c88319..cd4504630f8719 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -799,7 +799,7 @@ StmtResult Parser::ParseLabeledStatement(ParsedAttributes &Attrs, } // If we've not parsed a statement yet, parse one now. - if (!SubStmt.isInvalid() && !SubStmt.isUsable()) + if (SubStmt.isUnset()) SubStmt = ParseStatement(nullptr, StmtCtx); // Broken substmt shouldn't prevent the label from being added to the AST. diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 822202fd81dc89..0f37738b217c6d 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -571,8 +571,8 @@ static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S, .addDefaultHandleConstructor(S); } -Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc, - TemplateTypeParmDecl *T) { +static Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc, + TemplateTypeParmDecl *T) { ASTContext &Context = S.getASTContext(); // Obtain the QualType for 'unsigned long' @@ -592,7 +592,8 @@ Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc, return TypedResExpr; } -ConceptDecl *constructTypedBufferConceptDecl(Sema &S, NamespaceDecl *NSD) { +static ConceptDecl *constructTypedBufferConceptDecl(Sema &S, + NamespaceDecl *NSD) { ASTContext &Context = S.getASTContext(); DeclContext *DC = NSD->getDeclContext(); SourceLocation DeclLoc = SourceLocation(); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 0c70d4e5cff25c..3ac069270a352d 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1696,7 +1696,7 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { return true; } -bool CheckArgTypeIsCorrect( +static bool CheckArgTypeIsCorrect( Sema *S, Expr *Arg, QualType ExpectedType, llvm::function_ref Check) { QualType PassedType = Arg->getType(); @@ -1711,7 +1711,7 @@ bool CheckArgTypeIsCorrect( return false; } -bool CheckAllArgTypesAreCorrect( +static bool CheckAllArgTypesAreCorrect( Sema *S, CallExpr *TheCall, QualType ExpectedType, llvm::function_ref Check) { for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { @@ -1888,6 +1888,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_asdouble: { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + if (CheckUnsignedIntRepresentation(&SemaRef, TheCall)) + return true; + + SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().DoubleTy); + break; + } case Builtin::BI__builtin_hlsl_elementwise_clamp: { if (SemaRef.checkArgCount(TheCall, 3)) return true; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5d6b835e6da82e..976d48e1249136 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -11102,7 +11102,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFlushDirective(ArrayRef Clauses, for (const OMPClause *C : Clauses) { if (C->getClauseKind() == OMPC_acq_rel || C->getClauseKind() == OMPC_acquire || - C->getClauseKind() == OMPC_release) { + C->getClauseKind() == OMPC_release || + C->getClauseKind() == OMPC_seq_cst /*OpenMP 5.1*/) { if (MemOrderKind != OMPC_unknown) { Diag(C->getBeginLoc(), diag::err_omp_several_mem_order_clauses) << getOpenMPDirectiveName(OMPD_flush) << 1 diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 2ea2a368dd24cf..86d15f6324f4f5 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -1157,10 +1157,12 @@ ExprResult Sema::ActOnPackIndexingExpr(Scope *S, Expr *PackExpression, return Res; } -ExprResult -Sema::BuildPackIndexingExpr(Expr *PackExpression, SourceLocation EllipsisLoc, - Expr *IndexExpr, SourceLocation RSquareLoc, - ArrayRef ExpandedExprs, bool EmptyPack) { +ExprResult Sema::BuildPackIndexingExpr(Expr *PackExpression, + SourceLocation EllipsisLoc, + Expr *IndexExpr, + SourceLocation RSquareLoc, + ArrayRef ExpandedExprs, + bool FullySubstituted) { std::optional Index; if (!IndexExpr->isInstantiationDependent()) { @@ -1174,8 +1176,8 @@ Sema::BuildPackIndexingExpr(Expr *PackExpression, SourceLocation EllipsisLoc, IndexExpr = Res.get(); } - if (Index && (!ExpandedExprs.empty() || EmptyPack)) { - if (*Index < 0 || EmptyPack || *Index >= int64_t(ExpandedExprs.size())) { + if (Index && FullySubstituted) { + if (*Index < 0 || *Index >= int64_t(ExpandedExprs.size())) { Diag(PackExpression->getBeginLoc(), diag::err_pack_index_out_of_bound) << *Index << PackExpression << ExpandedExprs.size(); return ExprError(); @@ -1184,7 +1186,7 @@ Sema::BuildPackIndexingExpr(Expr *PackExpression, SourceLocation EllipsisLoc, return PackIndexingExpr::Create(getASTContext(), EllipsisLoc, RSquareLoc, PackExpression, IndexExpr, Index, - ExpandedExprs, EmptyPack); + ExpandedExprs, FullySubstituted); } TemplateArgumentLoc Sema::getTemplateArgumentPackExpansionPattern( diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 1465bba87724b9..9cf1b2d073a90f 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3670,10 +3670,10 @@ class TreeTransform { SourceLocation RSquareLoc, Expr *PackIdExpression, Expr *IndexExpr, ArrayRef ExpandedExprs, - bool EmptyPack = false) { + bool FullySubstituted = false) { return getSema().BuildPackIndexingExpr(PackIdExpression, EllipsisLoc, IndexExpr, RSquareLoc, ExpandedExprs, - EmptyPack); + FullySubstituted); } /// Build a new expression representing a call to a source location @@ -6769,6 +6769,7 @@ TreeTransform::TransformPackIndexingType(TypeLocBuilder &TLB, if (Out.isNull()) return QualType(); SubtitutedTypes.push_back(Out); + FullySubstituted &= !Out->containsUnexpandedParameterPack(); } // If we're supposed to retain a pack expansion, do so by temporarily // forgetting the partially-substituted parameter pack. @@ -15581,6 +15582,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { } SmallVector ExpandedExprs; + bool FullySubstituted = true; if (!E->expandsToEmptyPack() && E->getExpressions().empty()) { Expr *Pattern = E->getPackIdExpression(); SmallVector Unexpanded; @@ -15605,7 +15607,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { return ExprError(); return getDerived().RebuildPackIndexingExpr( E->getEllipsisLoc(), E->getRSquareLoc(), Pack.get(), IndexExpr.get(), - {}); + {}, /*FullySubstituted=*/false); } for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I); @@ -15617,6 +15619,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { OrigNumExpansions); if (Out.isInvalid()) return true; + FullySubstituted = false; } ExpandedExprs.push_back(Out.get()); } @@ -15633,6 +15636,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { OrigNumExpansions); if (Out.isInvalid()) return true; + FullySubstituted = false; ExpandedExprs.push_back(Out.get()); } } else if (!E->expandsToEmptyPack()) { @@ -15644,8 +15648,7 @@ TreeTransform::TransformPackIndexingExpr(PackIndexingExpr *E) { return getDerived().RebuildPackIndexingExpr( E->getEllipsisLoc(), E->getRSquareLoc(), E->getPackIdExpression(), - IndexExpr.get(), ExpandedExprs, - /*EmptyPack=*/ExpandedExprs.size() == 0); + IndexExpr.get(), ExpandedExprs, FullySubstituted); } template diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index c39a1950a6cf24..731ad0b64dc850 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2191,7 +2191,7 @@ void ASTStmtReader::VisitSizeOfPackExpr(SizeOfPackExpr *E) { void ASTStmtReader::VisitPackIndexingExpr(PackIndexingExpr *E) { VisitExpr(E); E->TransformedExpressions = Record.readInt(); - E->ExpandedToEmptyPack = Record.readInt(); + E->FullySubstituted = Record.readInt(); E->EllipsisLoc = readSourceLocation(); E->RSquareLoc = readSourceLocation(); E->SubExprs[0] = Record.readStmt(); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e7f567ff59a8ad..4994047d9fe10f 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2191,7 +2191,7 @@ void ASTStmtWriter::VisitSizeOfPackExpr(SizeOfPackExpr *E) { void ASTStmtWriter::VisitPackIndexingExpr(PackIndexingExpr *E) { VisitExpr(E); Record.push_back(E->TransformedExpressions); - Record.push_back(E->ExpandedToEmptyPack); + Record.push_back(E->FullySubstituted); Record.AddSourceLocation(E->getEllipsisLoc()); Record.AddSourceLocation(E->getRSquareLoc()); Record.AddStmt(E->getPackIdExpression()); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp index 9312bf0af16dbf..599c2179db0f0e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp @@ -114,7 +114,7 @@ class UncountedLambdaCapturesChecker if (!DRE) return; auto *MD = dyn_cast_or_null(DRE->getDecl()); - if (!MD || CE->getNumArgs() != 1) + if (!MD || CE->getNumArgs() < 1) return; auto *Arg = CE->getArg(0)->IgnoreParenCasts(); auto *ArgRef = dyn_cast(Arg); diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 56f54ff168f3e8..7a4fc89a27daca 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++2c -fcxx-exceptions -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c++2c -fcxx-exceptions -fexperimental-new-constant-interpreter -verify=expected,both %s -DBYTECODE // RUN: %clang_cc1 -std=c++2c -fcxx-exceptions -verify=ref,both %s namespace std { @@ -338,3 +338,17 @@ namespace PR48606 { } static_assert(f()); } + +#ifdef BYTECODE +constexpr int N = [] // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{assignment to dereferenced one-past-the-end pointer is not allowed in a constant expression}} \ + // expected-note {{in call to}} +{ + struct S { + int a[1]; + }; + S s; + ::new (s.a) int[1][2][3][4](); + return s.a[0]; +}(); +#endif diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp index b63ffed8809fef..65eee9d49106df 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp @@ -125,7 +125,7 @@ void noescape_lambda() { } void lambda_capture_param(RefCountable* obj) { - auto someLambda = [&] { + auto someLambda = [&]() { obj->method(); }; someLambda(); @@ -178,3 +178,10 @@ void trivial_lambda() { }; trivial_lambda(); } + +void lambda_with_args(RefCountable* obj) { + auto trivial_lambda = [&](int v) { + obj->method(); + }; + trivial_lambda(1); +} diff --git a/clang/test/CodeGenHLSL/builtins/asdouble.hlsl b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl new file mode 100644 index 00000000000000..f1c31107cdcad6 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/asdouble.hlsl @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPV + +// Test lowering of asdouble expansion to shuffle/bitcast and splat when required + +// CHECK-LABEL: test_uint +double test_uint(uint low, uint high) { + // CHECK-SPV: %[[LOW_INSERT:.*]] = insertelement <1 x i32> + // CHECK-SPV: %[[LOW_SHUFFLE:.*]] = shufflevector <1 x i32> %[[LOW_INSERT]], {{.*}} zeroinitializer + // CHECK-SPV: %[[HIGH_INSERT:.*]] = insertelement <1 x i32> + // CHECK-SPV: %[[HIGH_SHUFFLE:.*]] = shufflevector <1 x i32> %[[HIGH_INSERT]], {{.*}} zeroinitializer + + // CHECK-SPV: %[[SHUFFLE0:.*]] = shufflevector <1 x i32> %[[LOW_SHUFFLE]], <1 x i32> %[[HIGH_SHUFFLE]], + // CHECK-SPV-SAME: {{.*}} + // CHECK-SPV: bitcast <2 x i32> %[[SHUFFLE0]] to double + + // CHECK-DXIL: call double @llvm.dx.asdouble.i32 + return asdouble(low, high); +} + +// CHECK-DXIL: declare double @llvm.dx.asdouble.i32 + +// CHECK-LABEL: test_vuint +double3 test_vuint(uint3 low, uint3 high) { + // CHECK-SPV: %[[SHUFFLE1:.*]] = shufflevector + // CHECK-SPV-SAME: {{.*}} + // CHECK-SPV: bitcast <6 x i32> %[[SHUFFLE1]] to <3 x double> + + // CHECK-DXIL: call <3 x double> @llvm.dx.asdouble.v3i32 + return asdouble(low, high); +} + +// CHECK-DXIL: declare <3 x double> @llvm.dx.asdouble.v3i32 diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 61cbf5e65d0d21..f9e07fbc6b0480 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -89,7 +89,7 @@ // GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl index 86f4f73c81c0fc..5b75ee417e545b 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl @@ -11,6 +11,10 @@ // REQUIRES: amdgpu-registered-target typedef unsigned int uint; -void test_prng_b32(global uint* out, uint a) { +typedef unsigned int uint2 __attribute__((ext_vector_type(2))); + +void test(global uint* out, global uint2* out_v2u32, uint a, uint b) { *out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}} + *out_v2u32 = __builtin_amdgcn_permlane16_swap(a, b, false, false); // expected-error{{'__builtin_amdgcn_permlane16_swap' needs target feature permlane16-swap}} + *out_v2u32 = __builtin_amdgcn_permlane32_swap(a, b, false, false); // expected-error{{'__builtin_amdgcn_permlane32_swap' needs target feature permlane32-swap}} } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl index f31ba85a52a7ad..49f85982faf5a5 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl @@ -3,6 +3,7 @@ // REQUIRES: amdgpu-registered-target typedef unsigned int uint; +typedef unsigned int __attribute__((ext_vector_type(2))) uint2; // CHECK-LABEL: @test_prng_b32( // CHECK-NEXT: entry: @@ -19,3 +20,89 @@ typedef unsigned int uint; void test_prng_b32(global uint* out, uint a) { *out = __builtin_amdgcn_prng_b32(a); } + +// CHECK-LABEL: @test_permlane16_swap( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP0]], i32 [[TMP1]], i1 false, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0 +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP8]], i32 [[TMP9]], i1 true, i1 false) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i32 } [[TMP10]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i32 } [[TMP10]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP16]], i32 [[TMP17]], i1 false, i1 true) +// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i32 } [[TMP18]], 0 +// CHECK-NEXT: [[TMP20:%.*]] = extractvalue { i32, i32 } [[TMP18]], 1 +// CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i64 0 +// CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i64 1 +// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8 +// CHECK-NEXT: ret void +// +void test_permlane16_swap(global uint2* out, uint old, uint src) { + *out = __builtin_amdgcn_permlane16_swap(old, src, false, false); + *out = __builtin_amdgcn_permlane16_swap(old, src, true, false); + *out = __builtin_amdgcn_permlane16_swap(old, src, false, true); +} + +// CHECK-LABEL: @test_permlane32_swap( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[OLD:%.*]], ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: store i32 [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 [[TMP0]], i32 [[TMP1]], i1 false, i1 false) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0 +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 [[TMP8]], i32 [[TMP9]], i1 true, i1 false) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i32 } [[TMP10]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i32 } [[TMP10]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(5) [[OLD_ADDR]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(5) [[SRC_ADDR]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 [[TMP16]], i32 [[TMP17]], i1 false, i1 true) +// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i32 } [[TMP18]], 0 +// CHECK-NEXT: [[TMP20:%.*]] = extractvalue { i32, i32 } [[TMP18]], 1 +// CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i64 0 +// CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i64 1 +// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8 +// CHECK-NEXT: ret void +// +void test_permlane32_swap(global uint2* out, uint old, uint src) { + *out = __builtin_amdgcn_permlane32_swap(old, src, false, false); + *out = __builtin_amdgcn_permlane32_swap(old, src, true, false); + *out = __builtin_amdgcn_permlane32_swap(old, src, false, true); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl index d778df15599c53..00346baa6ff84d 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl @@ -538,4 +538,32 @@ void test_smfmac_f32_16x16x128_fp8_fp8(global v4f* out, v4i a, v8i b, v4f c, int *out = __builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8(a, b, c, idx, 0, 0); } +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_bf8_bf8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_bf8_bf8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a, b, c, idx, 0, 0); +} + +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_bf8_fp8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_bf8_fp8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a, b, c, idx, 0, 0); +} + +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_bf8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_fp8_bf8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, 0); +} + +// CHECK-GFX950-LABEL: @test_smfmac_f32_32x32x64_fp8_fp8 +// CHECK-GFX950: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %c, i32 %idx, i32 0, i32 0) +void test_smfmac_f32_32x32x64_fp8_fp8(global v16f* out, v4i a, v8i b, v16f c, int idx) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, 0); +} + #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 3bc6107b7fd40d..c22a43146a8c89 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK-AMDGCN,CHECK %s -// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefix=CHECK %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK,CHECK-SPIRV %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -866,7 +866,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK-LABEL test_wavefrontsize( unsigned test_wavefrontsize() { - // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize() + // CHECK-AMDGCN: ret i32 {{[0-9]+}} + // CHECK-SPIRV: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize() return __builtin_amdgcn_wavefrontsize(); } diff --git a/clang/test/Driver/cuda-no-threadsafe-statics.cu b/clang/test/Driver/cuda-no-threadsafe-statics.cu new file mode 100644 index 00000000000000..eb15312f8f7d14 --- /dev/null +++ b/clang/test/Driver/cuda-no-threadsafe-statics.cu @@ -0,0 +1,13 @@ +// Check that -fno-thread-safe-statics get passed down to device-side +// compilation only. +// +// RUN: %clang -### -x cuda --target=x86_64-linux-gnu -c --cuda-gpu-arch=sm_20 %s \ +// RUN: -nocudainc -nocudalib 2>&1 | FileCheck %s + +// RUN: %clang -### -x hip --target=x86_64-linux-gnu -c --cuda-gpu-arch=gfx1010 %s \ +// RUN: -nocudainc -nocudalib 2>&1 | FileCheck %s +// +// CHECK: "-fcuda-is-device" +// CHECK-SAME: "-fno-threadsafe-statics" +// CHECK: "-triple" "x86_64-unknown-linux-gnu" +// CHECK-NOT: "-fno-threadsafe-statics" diff --git a/clang/test/Driver/hip-rdc-device-only.hip b/clang/test/Driver/hip-rdc-device-only.hip index 72933c9611d89e..cbb2433f2a6a22 100644 --- a/clang/test/Driver/hip-rdc-device-only.hip +++ b/clang/test/Driver/hip-rdc-device-only.hip @@ -66,7 +66,7 @@ // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" // COMMON-SAME: {{.*}} "-main-file-name" "a.cu" -// COMMON-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// COMMON-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" // COMMON-SAME: "-target-cpu" "gfx803" // COMMON-SAME: "-fgpu-rdc" @@ -79,7 +79,7 @@ // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" // COMMON-SAME: {{.*}} "-main-file-name" "a.cu" -// COMMON-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// COMMON-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" // COMMON-SAME: "-target-cpu" "gfx900" // COMMON-SAME: "-fgpu-rdc" @@ -96,7 +96,7 @@ // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" // COMMON-SAME: {{.*}} "-main-file-name" "b.hip" -// COMMON-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// COMMON-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" // COMMON-SAME: "-target-cpu" "gfx803" // COMMON-SAME: "-fgpu-rdc" @@ -109,7 +109,7 @@ // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" // COMMON-SAME: {{.*}} "-main-file-name" "b.hip" -// COMMON-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// COMMON-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // COMMON-SAME: "-fapply-global-visibility-to-externs" // COMMON-SAME: "-target-cpu" "gfx900" // COMMON-SAME: "-fgpu-rdc" diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index 4a91c9dbe7570e..054db261d8e57e 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -49,7 +49,7 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" @@ -72,7 +72,7 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" @@ -112,7 +112,7 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" @@ -135,7 +135,7 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: "-fcuda-is-device" "-mllvm" "-amdgpu-internalize-symbols" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 0ce5ea5174e1b1..80f325c5d73733 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -13,7 +13,7 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" @@ -48,7 +48,7 @@ // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index 6d3f46f8a94674..96da423144c1c0 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -93,7 +93,7 @@ // CHECK-SAME: "-aux-triple" [[HOST:"x86_64-[^"]+"]] // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" @@ -105,7 +105,7 @@ // CHECK-SAME: "-aux-triple" [[HOST]] // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" -// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" +// CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-fcuda-allow-variadic-functions" "-fvisibility=hidden" // CHECK-SAME: "-fapply-global-visibility-to-externs" // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc" // CHECK-SAME: "-target-cpu" "gfx803" diff --git a/clang/test/OpenMP/flush_ast_print.cpp b/clang/test/OpenMP/flush_ast_print.cpp index 9578ada020227a..768282422032fd 100644 --- a/clang/test/OpenMP/flush_ast_print.cpp +++ b/clang/test/OpenMP/flush_ast_print.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // expected-no-diagnostics #ifndef HEADER @@ -19,6 +19,7 @@ T tmain(T argc) { #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release +#pragma omp flush seq_cst #pragma omp flush(a) return a + argc; } @@ -27,18 +28,21 @@ T tmain(T argc) { // CHECK-NEXT: #pragma omp flush acq_rel{{$}} // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release{{$}} +// CHECK-NEXT: #pragma omp flush seq_cst{{$}} // CHECK-NEXT: #pragma omp flush (a) // CHECK: static int a; // CHECK-NEXT: #pragma omp flush // CHECK-NEXT: #pragma omp flush acq_rel{{$}} // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release{{$}} +// CHECK-NEXT: #pragma omp flush seq_cst{{$}} // CHECK-NEXT: #pragma omp flush (a) // CHECK: static char a; // CHECK-NEXT: #pragma omp flush // CHECK-NEXT: #pragma omp flush acq_rel{{$}} // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release{{$}} +// CHECK-NEXT: #pragma omp flush seq_cst{{$}} // CHECK-NEXT: #pragma omp flush (a) int main(int argc, char **argv) { @@ -48,11 +52,13 @@ int main(int argc, char **argv) { #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release +#pragma omp flush seq_cst #pragma omp flush(a) // CHECK-NEXT: #pragma omp flush // CHECK-NEXT: #pragma omp flush acq_rel // CHECK-NEXT: #pragma omp flush acquire{{$}} // CHECK-NEXT: #pragma omp flush release +// CHECK-NEXT: #pragma omp flush seq_cst // CHECK-NEXT: #pragma omp flush (a) return tmain(argc) + tmain(argv[0][0]) + a; } diff --git a/clang/test/OpenMP/flush_codegen.cpp b/clang/test/OpenMP/flush_codegen.cpp index c7dd88ef9ac313..fa2586d9fe258d 100644 --- a/clang/test/OpenMP/flush_codegen.cpp +++ b/clang/test/OpenMP/flush_codegen.cpp @@ -1,13 +1,13 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER @@ -17,6 +17,7 @@ template T tmain(T argc) { static T a; #pragma omp flush +#pragma omp flush seq_cst #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release @@ -28,6 +29,7 @@ T tmain(T argc) { int main() { static int a; #pragma omp flush +#pragma omp flush seq_cst #pragma omp flush acq_rel #pragma omp flush acquire #pragma omp flush release @@ -37,6 +39,7 @@ int main() { // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) + // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) return tmain(a); // CHECK: call {{.*}} [[TMAIN:@.+]]( // CHECK: ret @@ -48,6 +51,7 @@ int main() { // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) +// CHECK: call {{.*}}void @__kmpc_flush(ptr {{(@|%).+}}) // CHECK: ret // CHECK-NOT: line: 0, diff --git a/clang/test/OpenMP/flush_messages.cpp b/clang/test/OpenMP/flush_messages.cpp index ad4830b5bf94f9..e78949bc924e15 100644 --- a/clang/test/OpenMP/flush_messages.cpp +++ b/clang/test/OpenMP/flush_messages.cpp @@ -134,14 +134,12 @@ label1 : { #pragma omp flush(argc) flush(argc) // expected-warning {{extra tokens at the end of '#pragma omp flush' are ignored}} #pragma omp parallel flush(argc) // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} ; -#pragma omp flush seq_cst // expected-error {{unexpected OpenMP clause 'seq_cst' in directive '#pragma omp flush'}} #pragma omp flush acq_rel // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} #pragma omp flush acquire // omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} #pragma omp flush release // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} #pragma omp flush relaxed // expected-error {{unexpected OpenMP clause 'relaxed' in directive '#pragma omp flush'}} -#pragma omp flush seq_cst // expected-error {{unexpected OpenMP clause 'seq_cst' in directive '#pragma omp flush'}} -#pragma omp flush acq_rel acquire // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'acq_rel' clause used here}} -#pragma omp flush release acquire // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'release' clause used here}} +#pragma omp flush acq_rel acquire // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'seq_cst', 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'acq_rel' clause used here}} +#pragma omp flush release acquire // omp45-error {{unexpected OpenMP clause 'release' in directive '#pragma omp flush'}} omp45-error {{unexpected OpenMP clause 'acquire' in directive '#pragma omp flush'}} omp51-error {{directive '#pragma omp flush' cannot contain more than one 'seq_cst', 'acq_rel', 'acquire' or 'release' clause}} omp51-note {{'release' clause used here}} #pragma omp flush acq_rel (argc) // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} expected-warning {{extra tokens at the end of '#pragma omp flush' are ignored}} #pragma omp flush(argc) acq_rel // omp45-error {{unexpected OpenMP clause 'acq_rel' in directive '#pragma omp flush'}} omp51-error {{'flush' directive with memory order clause 'acq_rel' cannot have the list}} omp51-note {{memory order clause 'acq_rel' is specified here}} return tmain(argc); diff --git a/clang/test/Sema/attr-target-version-unsupported.c b/clang/test/Sema/attr-target-version-unsupported.c new file mode 100644 index 00000000000000..7cf8172f5272e6 --- /dev/null +++ b/clang/test/Sema/attr-target-version-unsupported.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +//expected-warning@+1 {{unknown attribute 'target_version' ignored}} +int __attribute__((target_version("aes"))) foo(void) { return 3; } diff --git a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp index 962dbb8137f289..cb679a6c3ad879 100644 --- a/clang/test/SemaCXX/cxx2c-pack-indexing.cpp +++ b/clang/test/SemaCXX/cxx2c-pack-indexing.cpp @@ -271,3 +271,37 @@ void f() { } } // namespace GH105903 + +namespace GH116105 { + +template using pack_type = Ts...[Np]; + +template using pack_expr = decltype(Ts...[Np]); + +template struct types; + +template struct indices; + +template struct repack; + +template struct repack> { + template + using pack_type_alias = types...>; + + template + using pack_expr_alias = types...>; +}; + +template struct mdispatch_ { + using Idx = __make_integer_seq; + + static_assert(__is_same( + typename repack::template pack_type_alias, types)); + + static_assert(__is_same( + typename repack::template pack_expr_alias, types)); +}; + +mdispatch_ d; + +} // namespace GH116105 diff --git a/clang/test/SemaHLSL/BuiltIns/asdouble-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/asdouble-errors.hlsl new file mode 100644 index 00000000000000..c6b57c76a1e2b3 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/asdouble-errors.hlsl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +double test_too_few_arg() { + return __builtin_hlsl_asdouble(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} +} + +double test_too_few_arg_1(uint p0) { + return __builtin_hlsl_asdouble(p0); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} +} + +double test_too_many_arg(uint p0) { + return __builtin_hlsl_asdouble(p0, p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl index 38ba68dc21c8f2..2f1d312da7786c 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl @@ -124,3 +124,37 @@ void test_smfmac_f32_16x16x128_fp8_fp8(global float4* out, int4 a, int8 b, float *out = __builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8' must be a constant integer}} *out = __builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8' must be a constant integer}} } + +void test_smfmac_f32_32x32x64_bf8_bf8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' must be a constant integer}} +} + +void test_smfmac_f32_32x32x64_bf8_fp8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' must be a constant integer}} +} + +void test_smfmac_f32_32x32x64_fp8_bf8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' must be a constant integer}} +} + +void test_smfmac_f32_32x32x64_fp8_fp8(global float16* out, int4 a, int8 b, float16 c, int idx, int d) +{ + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, d, 0); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a, b, c, idx, 0, d); // expected-error{{argument to '__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' must be a constant integer}} +} + +void test_permlane16_swap(__global int* out, int old, int src, bool X) { + *out = __builtin_amdgcn_permlane16_swap(old, src, X, false); // expected-error{{argument to '__builtin_amdgcn_permlane16_swap' must be a constant integer}} + *out = __builtin_amdgcn_permlane16_swap(old, src, false, X); // expected-error{{argument to '__builtin_amdgcn_permlane16_swap' must be a constant integer}} +} + +void test_permlane32_swap(__global int* out, int old, int src, bool X) { + *out = __builtin_amdgcn_permlane32_swap(old, src, X, false); // expected-error{{argument to '__builtin_amdgcn_permlane32_swap' must be a constant integer}} + *out = __builtin_amdgcn_permlane32_swap(old, src, false, X); // expected-error{{argument to '__builtin_amdgcn_permlane32_swap' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl index c01ec623c19119..e0cde1d3ad87bb 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl @@ -27,7 +27,8 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, __global float4* out12, int4 a12, int8 b12, float4 c12, __global float16* out13, int4 a13, int8 b13, float16 c13, __global float4* out14, int8 a14, int8 b14, float4 c14, int d14, int e14, - __global float16* out15, int8 a15, int8 b15, float16 c15, int d15, int e15) { + __global float16* out15, int8 a15, int8 b15, float16 c15, int d15, int e15, + __global uint2* out16, int a16, int b16) { *out0 = __builtin_amdgcn_mfma_f32_16x16x32_f16(a0, b0, c0, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_16x16x32_f16' needs target feature gfx950-insts}} *out1 = __builtin_amdgcn_mfma_f32_32x32x16_f16(a1, b1, c1, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_32x32x16_f16' needs target feature gfx950-insts}} *out2 = __builtin_amdgcn_mfma_f32_32x32x16_bf16(a2, b2, c2, 0, 0, 0); // expected-error{{'__builtin_amdgcn_mfma_f32_32x32x16_bf16' needs target feature gfx950-insts}} @@ -44,6 +45,12 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0, *out12 = __builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8(a12, b12, c12, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8' needs target feature gfx950-insts}} *out12 = __builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8(a12, b12, c12, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8' needs target feature gfx950-insts}} *out12 = __builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8(a12, b12, c12, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs target feature gfx950-insts}} + *out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' needs target feature gfx950-insts}} *out14 = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a14, b14, c14, 0, 0, 0, d14, 0, e14); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' needs target feature gfx950-insts}} *out15 = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a15, b15, c15, 0, 0, 0, d15, 0, e15); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' needs target feature gfx950-insts}} + *out16 = __builtin_amdgcn_permlane16_swap(a16, b16, false, false); // expected-error{{'__builtin_amdgcn_permlane16_swap' needs target feature permlane16-swap}} + *out16 = __builtin_amdgcn_permlane32_swap(a16, b16, false, false); // expected-error{{'__builtin_amdgcn_permlane32_swap' needs target feature permlane32-swap}} } diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index cc735e48725921..5481bb6b87503c 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -178,7 +178,7 @@ enum class WNoError { Unknown }; static cl::bits WNoErrorList( "Wno-error", - cl::desc("If set don't error out on the specified warning type."), + cl::desc("If set, don't error out on the specified warning type."), cl::values( clEnumValN(WNoError::Unknown, "unknown", "If set, unknown format options are only warned about.\n" diff --git a/clang/tools/clang-shlib/CMakeLists.txt b/clang/tools/clang-shlib/CMakeLists.txt index 2d97347ea7f828..31484ec49c7739 100644 --- a/clang/tools/clang-shlib/CMakeLists.txt +++ b/clang/tools/clang-shlib/CMakeLists.txt @@ -48,13 +48,11 @@ add_clang_library(clang-cpp ${_OBJECTS} LINK_LIBS ${_DEPS}) -# AIX linker does not support version script -if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") - configure_file(simple_version_script.map.in simple_version_script.map) - if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - target_link_options(clang-cpp PRIVATE LINKER:--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) - endif() +configure_file(simple_version_script.map.in simple_version_script.map) + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(clang-cpp PRIVATE LINKER:--version-script,${CMAKE_CURRENT_BINARY_DIR}/simple_version_script.map) endif() # Optimize function calls for default visibility definitions to avoid PLT and diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index e1ae1770e8ebe8..9db3187ac44e70 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2600,16 +2600,20 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown); // Module headers. - Tokens = Annotate("module x();\nendmodule"); + Tokens = Annotate("module x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_VerilogMultiLineListLParen); - Tokens = Annotate("function automatic `x x();\nendmodule"); + Tokens = Annotate("function automatic `x x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 10u) << Tokens; EXPECT_TOKEN(Tokens[5], tok::l_paren, TT_VerilogMultiLineListLParen); - Tokens = Annotate("function automatic x``x x();\nendmodule"); + Tokens = Annotate("function automatic x``x x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen); - Tokens = Annotate("function automatic x::x x();\nendmodule"); + Tokens = Annotate("function automatic x::x x();\n" + "endmodule"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_VerilogMultiLineListLParen); } diff --git a/clang/utils/TableGen/ClangASTNodesEmitter.cpp b/clang/utils/TableGen/ClangASTNodesEmitter.cpp index 16749d11836246..5971b0012305d9 100644 --- a/clang/utils/TableGen/ClangASTNodesEmitter.cpp +++ b/clang/utils/TableGen/ClangASTNodesEmitter.cpp @@ -207,8 +207,9 @@ void clang::EmitClangASTNodes(const RecordKeeper &RK, raw_ostream &OS, ClangASTNodesEmitter(RK, N, S, PriorizeIfSubclassOf).run(OS); } -void printDeclContext(const std::multimap &Tree, - const Record *DeclContext, raw_ostream &OS) { +static void +printDeclContext(const std::multimap &Tree, + const Record *DeclContext, raw_ostream &OS) { if (!DeclContext->getValueAsBit(AbstractFieldName)) OS << "DECL_CONTEXT(" << DeclContext->getName() << ")\n"; auto [II, E] = Tree.equal_range(DeclContext); diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 4aa7594ffa6eb7..534bf2d01d7957 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -1821,9 +1821,9 @@ CreateSemanticSpellings(const std::vector &Spellings, return Ret; } -void WriteSemanticSpellingSwitch(StringRef VarName, - const SemanticSpellingMap &Map, - raw_ostream &OS) { +static void WriteSemanticSpellingSwitch(StringRef VarName, + const SemanticSpellingMap &Map, + raw_ostream &OS) { OS << " switch (" << VarName << ") {\n default: " << "llvm_unreachable(\"Unknown spelling list index\");\n"; for (const auto &I : Map) @@ -2367,12 +2367,12 @@ template static void forEachSpelling(const Record &Attr, Fn &&F) { } } -std::map> NameToAttrsMap; +static std::map> NameToAttrsMap; /// Build a map from the attribute name to the Attrs that use that name. If more /// than one Attr use a name, the arguments could be different so a more complex /// check is needed in the generated switch. -void generateNameToAttrsMap(const RecordKeeper &Records) { +static void generateNameToAttrsMap(const RecordKeeper &Records) { for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { for (const FlattenedSpelling &S : GetFlattenedSpellings(*A)) { auto [It, Inserted] = NameToAttrsMap.try_emplace(S.name()); @@ -3965,9 +3965,9 @@ void EmitClangAttrASTVisitor(const RecordKeeper &Records, raw_ostream &OS) { OS << "#endif // ATTR_VISITOR_DECLS_ONLY\n"; } -void EmitClangAttrTemplateInstantiateHelper(ArrayRef Attrs, - raw_ostream &OS, - bool AppliesToDecl) { +static void +EmitClangAttrTemplateInstantiateHelper(ArrayRef Attrs, + raw_ostream &OS, bool AppliesToDecl) { OS << " switch (At->getKind()) {\n"; for (const auto *Attr : Attrs) { @@ -4622,7 +4622,7 @@ static bool isParamExpr(const Record *Arg) { .Default(false); } -void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) { +static void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) { OS << "bool isParamExpr(size_t N) const override {\n"; OS << " return "; auto Args = Attr.getValueAsListOfDefs("Args"); @@ -4633,8 +4633,8 @@ void GenerateIsParamExpr(const Record &Attr, raw_ostream &OS) { OS << "}\n\n"; } -void GenerateHandleAttrWithDelayedArgs(const RecordKeeper &Records, - raw_ostream &OS) { +static void GenerateHandleAttrWithDelayedArgs(const RecordKeeper &Records, + raw_ostream &OS) { OS << "static void handleAttrWithDelayedArgs(Sema &S, Decl *D, "; OS << "const ParsedAttr &Attr) {\n"; OS << " SmallVector ArgExprs;\n"; diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index d59cbbbbec1b5b..da01cf6ceab59b 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -225,6 +225,32 @@

C++2c implementation status

P0963R3 No + + + constexpr structured bindings + P2686R5 + No + + + Allowing exception throwing in constant-evaluation + P3068R6 + No + + + Remove Deprecated Array Comparisons from C++26 + P2865R6 + No + + + Structured Bindings can introduce a Pack + P1061R10 + No + + + The Oxford variadic comma + P3176R1 + No + diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index ac81beee11a39c..8b8ce1abe906f6 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -816,6 +816,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { // mov rax, QWORD PTR [rip + XXXXXXXX] case 0x058d48: // 48 8d 05 XX XX XX XX : // lea rax, QWORD PTR [rip + XXXXXXXX] + case 0x0d8948: // 48 89 0d XX XX XX XX : + // mov QWORD PTR [rip + XXXXXXXX], rcx + case 0x158948: // 48 89 15 XX XX XX XX : + // mov QWORD PTR [rip + XXXXXXXX], rdx case 0x25ff48: // 48 ff 25 XX XX XX XX : // rex.W jmp QWORD PTR [rip + XXXXXXXX] case 0x158D4C: // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX] diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h b/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h index 0749f633b4bcf5..1664b92b213692 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h @@ -120,7 +120,7 @@ class DeadlockDetectorTLS { u32 lock; u32 stk; }; - LockWithContext all_locks_with_contexts_[64]; + LockWithContext all_locks_with_contexts_[128]; uptr n_all_locks_; }; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 5a2d39cd30607f..c83efec8eaca2c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -673,7 +673,8 @@ void CheckUnwind() { thr->ignore_reads_and_writes++; atomic_store_relaxed(&thr->in_signal_handler, 0); #endif - PrintCurrentStackSlow(StackTrace::GetCurrentPc()); + PrintCurrentStack(StackTrace::GetCurrentPc(), + common_flags()->fast_unwind_on_fatal); } bool is_initialized; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index f48be8e0a4fe08..49bee9c67d3030 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -514,7 +514,7 @@ bool IsExpectedReport(uptr addr, uptr size); StackID CurrentStackId(ThreadState *thr, uptr pc); ReportStack *SymbolizeStackId(StackID stack_id); void PrintCurrentStack(ThreadState *thr, uptr pc); -void PrintCurrentStackSlow(uptr pc); // uses libunwind +void PrintCurrentStack(uptr pc, bool fast); // may uses libunwind MBlock *JavaHeapBlock(uptr addr, uptr *start); void Initialize(ThreadState *thr); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp index 0311df553fdd0a..51a98e2f2d5e75 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -828,18 +828,18 @@ void PrintCurrentStack(ThreadState *thr, uptr pc) { PrintStack(SymbolizeStack(trace)); } -// Always inlining PrintCurrentStackSlow, because LocatePcInTrace assumes +// Always inlining PrintCurrentStack, because LocatePcInTrace assumes // __sanitizer_print_stack_trace exists in the actual unwinded stack, but -// tail-call to PrintCurrentStackSlow breaks this assumption because +// tail-call to PrintCurrentStack breaks this assumption because // __sanitizer_print_stack_trace disappears after tail-call. // However, this solution is not reliable enough, please see dvyukov's comment // http://reviews.llvm.org/D19148#406208 // Also see PR27280 comment 2 and 3 for breaking examples and analysis. -ALWAYS_INLINE USED void PrintCurrentStackSlow(uptr pc) { +ALWAYS_INLINE USED void PrintCurrentStack(uptr pc, bool fast) { #if !SANITIZER_GO uptr bp = GET_CURRENT_FRAME(); auto *ptrace = New(); - ptrace->Unwind(pc, bp, nullptr, false); + ptrace->Unwind(pc, bp, nullptr, fast); for (uptr i = 0; i < ptrace->size / 2; i++) { uptr tmp = ptrace->trace_buffer[i]; @@ -857,6 +857,6 @@ using namespace __tsan; extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() { - PrintCurrentStackSlow(StackTrace::GetCurrentPc()); + PrintCurrentStack(StackTrace::GetCurrentPc(), false); } } // extern "C" diff --git a/compiler-rt/test/tsan/many_held_mutex.cpp b/compiler-rt/test/tsan/many_held_mutex.cpp new file mode 100644 index 00000000000000..76e072b35a2336 --- /dev/null +++ b/compiler-rt/test/tsan/many_held_mutex.cpp @@ -0,0 +1,21 @@ +// RUN: %clangxx_tsan -O1 %s %link_libcxx_tsan -fsanitize=thread -o %t +// RUN: %run %t 128 + +#include +#include +#include + +int main(int argc, char *argv[]) { + int num_of_mtx = std::atoi(argv[1]); + + std::vector mutexes(num_of_mtx); + + for (auto &mu : mutexes) { + mu.lock(); + } + for (auto &mu : mutexes) { + mu.unlock(); + } + + return 0; +} diff --git a/flang/include/flang/Frontend/FrontendOptions.h b/flang/include/flang/Frontend/FrontendOptions.h index 82ca99672ec610..a4cb021e309d4a 100644 --- a/flang/include/flang/Frontend/FrontendOptions.h +++ b/flang/include/flang/Frontend/FrontendOptions.h @@ -236,7 +236,8 @@ class FrontendInputFile { struct FrontendOptions { FrontendOptions() : showHelp(false), showVersion(false), instrumentedParse(false), - showColors(false), needProvenanceRangeToCharBlockMappings(false) {} + showColors(false), printSupportedCPUs(false), + needProvenanceRangeToCharBlockMappings(false) {} /// Show the -help text. unsigned showHelp : 1; @@ -250,6 +251,9 @@ struct FrontendOptions { /// Enable color diagnostics. unsigned showColors : 1; + /// Print the supported cpus for the current target + unsigned printSupportedCPUs : 1; + /// Enable Provenance to character-stream mapping. Allows e.g. IDEs to find /// symbols based on source-code location. This is not needed in regular /// compilation. diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h index e986ad910a3f3a..bbfcd2cafcdb21 100644 --- a/flang/include/flang/Runtime/CUDA/allocatable.h +++ b/flang/include/flang/Runtime/CUDA/allocatable.h @@ -22,6 +22,18 @@ int RTDECL(CUFAllocatableAllocate)(Descriptor &, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); +/// Perform allocation of the descriptor without synchronization. Assign data +/// from source. +int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, + const Descriptor &source, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine); + +/// Perform allocation of the descriptor with synchronization of it when +/// necessary. Assign data from source. +int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, + const Descriptor &source, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine); + /// Perform deallocation of the descriptor with synchronization of it when /// necessary. int RTDECL(CUFAllocatableDeallocate)(Descriptor &, bool hasStat = false, diff --git a/flang/include/flang/Runtime/CUDA/memmove-function.h b/flang/include/flang/Runtime/CUDA/memmove-function.h new file mode 100644 index 00000000000000..74d6a05eff4c9a --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/memmove-function.h @@ -0,0 +1,23 @@ +//===-- include/flang/Runtime/CUDA/memmove-function.h -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#ifndef FORTRAN_RUNTIME_CUDA_MEMMOVE_FUNCTION_H_ +#define FORTRAN_RUNTIME_CUDA_MEMMOVE_FUNCTION_H_ + +namespace Fortran::runtime::cuda { + +void *MemmoveHostToDevice(void *dst, const void *src, std::size_t count); + +void *MemmoveDeviceToHost(void *dst, const void *src, std::size_t count); + +void *MemmoveDeviceToDevice(void *dst, const void *src, std::size_t count); + +} // namespace Fortran::runtime::cuda +#endif // FORTRAN_RUNTIME_CUDA_MEMMOVE_FUNCTION_H_ diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 1214a2ea6bf1f3..0b79c95eade0d3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -634,6 +634,8 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, opts.outputFile = args.getLastArgValue(clang::driver::options::OPT_o); opts.showHelp = args.hasArg(clang::driver::options::OPT_help); opts.showVersion = args.hasArg(clang::driver::options::OPT_version); + opts.printSupportedCPUs = + args.hasArg(clang::driver::options::OPT_print_supported_cpus); // Get the input kind (from the value passed via `-x`) InputKind dashX(Language::Unknown); diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index f1ebd08967b9a1..3983336516db9e 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -155,8 +155,12 @@ static mlir::LogicalResult convertOpToCall(OpTy op, auto fTy = func.getFunctionType(); mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); + mlir::Value sourceLine; + if constexpr (std::is_same_v) + sourceLine = fir::factory::locationToLineNo( + builder, loc, op.getSource() ? fTy.getInput(5) : fTy.getInput(4)); + else + sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true) : builder.createBool(loc, false); @@ -168,8 +172,21 @@ static mlir::LogicalResult convertOpToCall(OpTy op, mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType()); errmsg = builder.create(loc, boxNoneTy).getResult(); } - llvm::SmallVector args{fir::runtime::createArguments( - builder, loc, fTy, op.getBox(), hasStat, errmsg, sourceFile, sourceLine)}; + llvm::SmallVector args; + if constexpr (std::is_same_v) { + if (op.getSource()) + args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), + op.getSource(), hasStat, errmsg, + sourceFile, sourceLine); + else + args = + fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat, + errmsg, sourceFile, sourceLine); + } else { + args = + fir::runtime::createArguments(builder, loc, fTy, op.getBox(), hasStat, + errmsg, sourceFile, sourceLine); + } auto callOp = builder.create(loc, func, args); rewriter.replaceOp(op, callOp); return mlir::success(); @@ -182,10 +199,6 @@ struct CUFAllocateOpConversion mlir::LogicalResult matchAndRewrite(cuf::AllocateOp op, mlir::PatternRewriter &rewriter) const override { - // TODO: Allocation with source will need a new entry point in the runtime. - if (op.getSource()) - return mlir::failure(); - // TODO: Allocation using different stream. if (op.getStream()) return mlir::failure(); @@ -202,18 +215,28 @@ struct CUFAllocateOpConversion if (hasDoubleDescriptors(op)) { // Allocation for module variable are done with custom runtime entry point // so the descriptors can be synchronized. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc( - loc, builder); - return convertOpToCall(op, rewriter, func); + mlir::func::FuncOp func; + if (op.getSource()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else + func = fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); } - // Allocation for local descriptor falls back on the standard runtime - // AllocatableAllocate as the dedicated allocator is set in the descriptor - // before the call. - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc(loc, - builder); + mlir::func::FuncOp func; + if (op.getSource()) + func = + fir::runtime::getRuntimeFunc( + loc, builder); + else + // Allocation for local descriptor falls back on the standard runtime + // AllocatableAllocate as the dedicated allocator is set in the descriptor + // before the call. + func = fir::runtime::getRuntimeFunc( + loc, builder); + return convertOpToCall(op, rewriter, func); } }; @@ -236,7 +259,7 @@ struct CUFDeallocateOpConversion mlir::func::FuncOp func = fir::runtime::getRuntimeFunc( loc, builder); - return convertOpToCall(op, rewriter, func); + return convertOpToCall(op, rewriter, func); } // Deallocation for local descriptor falls back on the standard runtime diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 832c9f2c7174df..6ef4e851f03ab3 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2875,45 +2875,41 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Reduction &x) { bool OmpStructureChecker::CheckReductionOperators( const parser::OmpClause::Reduction &x) { + bool ok = false; auto &modifiers{OmpGetModifiers(x.v)}; - const auto *definedOp{ - OmpGetUniqueModifier(modifiers)}; - if (!definedOp) { - return false; + if (const auto *ident{ + OmpGetUniqueModifier(modifiers)}) { + + auto visitOperator{[&](const parser::DefinedOperator &dOpr) { + if (const auto *intrinsicOp{ + std::get_if( + &dOpr.u)}) { + ok = CheckIntrinsicOperator(*intrinsicOp); + } else { + context_.Say(GetContext().clauseSource, + "Invalid reduction operator in REDUCTION clause."_err_en_US, + ContextDirectiveAsFortran()); + } + }}; + + auto visitDesignator{[&](const parser::ProcedureDesignator &procD) { + const parser::Name *name{std::get_if(&procD.u)}; + if (name && name->symbol) { + const SourceName &realName{name->symbol->GetUltimate().name()}; + if (realName == "max" || realName == "min" || realName == "iand" || + realName == "ior" || realName == "ieor") { + ok = true; + } + } + if (!ok) { + context_.Say(GetContext().clauseSource, + "Invalid reduction identifier in REDUCTION " + "clause."_err_en_US, + ContextDirectiveAsFortran()); + } + }}; + common::visit(common::visitors{visitOperator, visitDesignator}, ident->u); } - bool ok = false; - common::visit( - common::visitors{ - [&](const parser::DefinedOperator &dOpr) { - if (const auto *intrinsicOp{ - std::get_if( - &dOpr.u)}) { - ok = CheckIntrinsicOperator(*intrinsicOp); - } else { - context_.Say(GetContext().clauseSource, - "Invalid reduction operator in REDUCTION clause."_err_en_US, - ContextDirectiveAsFortran()); - } - }, - [&](const parser::ProcedureDesignator &procD) { - const parser::Name *name{std::get_if(&procD.u)}; - if (name && name->symbol) { - const SourceName &realName{name->symbol->GetUltimate().name()}; - if (realName == "max" || realName == "min" || - realName == "iand" || realName == "ior" || - realName == "ieor") { - ok = true; - } - } - if (!ok) { - context_.Say(GetContext().clauseSource, - "Invalid reduction identifier in REDUCTION " - "clause."_err_en_US, - ContextDirectiveAsFortran()); - } - }, - }, - definedOp->u); return ok; } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index c75808a8963b3f..107bd3b09019a0 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -522,49 +522,47 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const auto &objList{std::get(x.v.t)}; ResolveOmpObjectList(objList, Symbol::Flag::OmpReduction); - auto &modifiers{OmpGetModifiers(x.v)}; - if (!modifiers) { - return false; - } - - auto createDummyProcSymbol = [&](const parser::Name *name) { - // If name resolution failed, create a dummy symbol - const auto namePair{ - currScope().try_emplace(name->source, Attrs{}, ProcEntityDetails{})}; - auto &newSymbol{*namePair.first->second}; - if (context_.intrinsics().IsIntrinsic(name->ToString())) { - newSymbol.attrs().set(Attr::INTRINSIC); - } - name->symbol = &newSymbol; - }; + if (auto &modifiers{OmpGetModifiers(x.v)}) { + auto createDummyProcSymbol = [&](const parser::Name *name) { + // If name resolution failed, create a dummy symbol + const auto namePair{currScope().try_emplace( + name->source, Attrs{}, ProcEntityDetails{})}; + auto &newSymbol{*namePair.first->second}; + if (context_.intrinsics().IsIntrinsic(name->ToString())) { + newSymbol.attrs().set(Attr::INTRINSIC); + } + name->symbol = &newSymbol; + }; - for (auto &mod : *modifiers) { - if (!std::holds_alternative(mod.u)) { - continue; - } - auto &opr{std::get(mod.u)}; - if (auto *procD{parser::Unwrap(opr.u)}) { - if (auto *name{parser::Unwrap(procD->u)}) { - if (!name->symbol) { - if (!ResolveName(name)) { - createDummyProcSymbol(name); + for (auto &mod : *modifiers) { + if (!std::holds_alternative(mod.u)) { + continue; + } + auto &opr{std::get(mod.u)}; + if (auto *procD{parser::Unwrap(opr.u)}) { + if (auto *name{parser::Unwrap(procD->u)}) { + if (!name->symbol) { + if (!ResolveName(name)) { + createDummyProcSymbol(name); + } } } - } - if (auto *procRef{parser::Unwrap(procD->u)}) { - if (!procRef->v.thing.component.symbol) { - if (!ResolveName(&procRef->v.thing.component)) { - createDummyProcSymbol(&procRef->v.thing.component); + if (auto *procRef{ + parser::Unwrap(procD->u)}) { + if (!procRef->v.thing.component.symbol) { + if (!ResolveName(&procRef->v.thing.component)) { + createDummyProcSymbol(&procRef->v.thing.component); + } } } } } - } - using ReductionModifier = parser::OmpReductionModifier; - if (auto *maybeModifier{ - OmpGetUniqueModifier(modifiers)}) { - if (maybeModifier->v == ReductionModifier::Value::Inscan) { - ResolveOmpObjectList(objList, Symbol::Flag::OmpInScanReduction); + using ReductionModifier = parser::OmpReductionModifier; + if (auto *maybeModifier{ + OmpGetUniqueModifier(modifiers)}) { + if (maybeModifier->v == ReductionModifier::Value::Inscan) { + ResolveOmpObjectList(objList, Symbol::Flag::OmpInScanReduction); + } } } return false; diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index ce87f3efdc3632..3a88824826de31 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -18,6 +18,7 @@ add_flang_library(${CUFRT_LIBNAME} allocatable.cpp descriptor.cpp kernel.cpp + memmove-function.cpp memory.cpp registration.cpp ) diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp index 649ddb638abe6d..9fed50c859a9cf 100644 --- a/flang/runtime/CUDA/allocatable.cpp +++ b/flang/runtime/CUDA/allocatable.cpp @@ -7,10 +7,12 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/CUDA/allocatable.h" +#include "../assign-impl.h" #include "../stat.h" #include "../terminator.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/CUDA/memmove-function.h" #include "flang/Runtime/allocatable.h" #include "cuda_runtime.h" @@ -45,6 +47,32 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, bool hasStat, return stat; } +int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, + const Descriptor &source, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { + int stat{RTNAME(AllocatableAllocate)( + alloc, hasStat, errMsg, sourceFile, sourceLine)}; + if (stat == StatOk) { + Terminator terminator{sourceFile, sourceLine}; + Fortran::runtime::DoFromSourceAssign( + alloc, source, terminator, &MemmoveHostToDevice); + } + return stat; +} + +int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, + const Descriptor &source, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { + int stat{RTNAME(AllocatableAllocate)( + alloc, hasStat, errMsg, sourceFile, sourceLine)}; + if (stat == StatOk) { + Terminator terminator{sourceFile, sourceLine}; + Fortran::runtime::DoFromSourceAssign( + alloc, source, terminator, &MemmoveHostToDevice); + } + return stat; +} + int RTDEF(CUFAllocatableDeallocate)(Descriptor &desc, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { // Perform the standard allocation. diff --git a/flang/runtime/CUDA/memmove-function.cpp b/flang/runtime/CUDA/memmove-function.cpp new file mode 100644 index 00000000000000..3ba9fa7e0f7f73 --- /dev/null +++ b/flang/runtime/CUDA/memmove-function.cpp @@ -0,0 +1,35 @@ +//===-- runtime/CUDA/memmove-function.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/memmove-function.h" +#include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +namespace Fortran::runtime::cuda { + +void *MemmoveHostToDevice(void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); + return dst; +} + +void *MemmoveDeviceToHost(void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); + return dst; +} + +void *MemmoveDeviceToDevice(void *dst, const void *src, std::size_t count) { + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice)); + return dst; +} + +} // namespace Fortran::runtime::cuda diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index 68963c4d7738ac..0bbb493d2db919 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -11,31 +11,12 @@ #include "../terminator.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" +#include "flang/Runtime/CUDA/memmove-function.h" #include "flang/Runtime/assign.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { -static void *MemmoveHostToDevice( - void *dst, const void *src, std::size_t count) { - // TODO: Use cudaMemcpyAsync when we have support for stream. - CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); - return dst; -} - -static void *MemmoveDeviceToHost( - void *dst, const void *src, std::size_t count) { - // TODO: Use cudaMemcpyAsync when we have support for stream. - CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); - return dst; -} - -static void *MemmoveDeviceToDevice( - void *dst, const void *src, std::size_t count) { - // TODO: Use cudaMemcpyAsync when we have support for stream. - CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice)); - return dst; -} extern "C" { diff --git a/flang/test/Driver/print-supported-cpus.f90 b/flang/test/Driver/print-supported-cpus.f90 new file mode 100644 index 00000000000000..13688b193f9b92 --- /dev/null +++ b/flang/test/Driver/print-supported-cpus.f90 @@ -0,0 +1,46 @@ +! Test --print-supported-cpus and associated aliases, -mcpu=help and +! -mtune=help + +! RUN: %if x86-registered-target %{ \ +! RUN: %flang --target=x86_64-unknown-linux-gnu --print-supported-cpus 2>&1 \ +! RUN: | FileCheck %s --check-prefixes=X86,CHECK \ +! RUN: %} +! RUN: %if x86-registered-target %{ \ +! RUN: %flang --target=x86_64-unknown-linux-gnu -mcpu=help 2>&1 \ +! RUN: | FileCheck %s --check-prefixes=X86,CHECK \ +! RUN: %} +! RUN: %if x86-registered-target %{ \ +! RUN: %flang --target=x86_64-unknown-linux-gnu -mtune=help 2>&1 \ +! RUN: | FileCheck %s --check-prefixes=X86,CHECK \ +! RUN: %} + +! RUN: %if aarch64-registered-target %{ \ +! RUN: %flang --target=aarch64-unknown-linux-gnu --print-supported-cpus 2>&1 \ +! RUN: | FileCheck %s --check-prefixes=AARCH64,CHECK \ +! RUN: %} +! RUN: %if x86-registered-target %{ \ +! RUN: %flang --target=aarch64-unknown-linux-gnu -mcpu=help 2>&1 \ +! RUN: | FileCheck %s --check-prefixes=AARCH64,CHECK \ +! RUN: %} +! RUN: %if x86-registered-target %{ \ +! RUN: %flang --target=aarch64-unknown-linux-gnu -mtune=help 2>&1 \ +! RUN: | FileCheck %s --check-prefixes=AARCH64,CHECK \ +! RUN: %} + +! CHECK-NOT: warning: argument unused during compilation + +! X86: Target: x86_64-unknown-linux-gnu +! X86: corei7 + +! AARCH64: Target: aarch64-unknown-linux-gnu +! AARCH64: cortex-a73 +! AARCH64: cortex-a75 + +! The footer displayed contains a reference to clang. This should be changed to +! flang, but that requires a change in llvm/MCSubtargetInfo. When that happens, +! this test will need to be updated and this comment can be removed. + +! CHECK: Use -mcpu or -mtune to specify the target's processor. +! CHECK: For example, clang --target=aarch64-unknown-linux-gnu + + end program diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index d68ff894d5af5a..47d75b16b7a2d2 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -120,4 +120,49 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} { // CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFAllocatableAllocate(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 + +func.func @_QPallocate_source() { + %c0_i64 = arith.constant 0 : i64 + %c1_i32 = arith.constant 1 : i32 + %c0_i32 = arith.constant 0 : i32 + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFallocate_sourceEa"} + %4 = fir.declare %0 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa"} : (!fir.ref>>>) -> !fir.ref>>> + %5 = cuf.alloc !fir.box>> {bindc_name = "a_d", data_attr = #cuf.cuda, uniq_name = "_QFallocate_sourceEa_d"} -> !fir.ref>>> + %7 = fir.declare %5 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa_d"} : (!fir.ref>>>) -> !fir.ref>>> + %8 = fir.load %4 : !fir.ref>>> + %22 = cuf.allocate %7 : !fir.ref>>> source(%8 : !fir.box>>) {data_attr = #cuf.cuda} -> i32 + return +} + +// CHECK-LABEL: func.func @_QPallocate_source() +// CHECK: %[[DECL_HOST:.*]] = fir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa"} : (!fir.ref>>>) -> !fir.ref>>> +// CHECK: %[[DECL_DEV:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFallocate_sourceEa_d"} : (!fir.ref>>>) -> !fir.ref>>> +// CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref>>> +// CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box>>) -> !fir.box +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i1, !fir.box, !fir.ref, i32) -> i32 + + +fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda} : !fir.box>> { + %c0 = arith.constant 0 : index + %0 = fir.zero_bits !fir.heap> + %1 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2> + %2 = fir.embox %0(%1) {allocator_idx = 2 : i32} : (!fir.heap>, !fir.shape<2>) -> !fir.box>> + fir.has_value %2 : !fir.box>> +} +func.func @_QMmod1Pallocate_source_global() { + %0 = fir.address_of(@_QMmod1Ea_d) : !fir.ref>>> + %1 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Ea_d"} : (!fir.ref>>>) -> !fir.ref>>> + %2 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"} + %6 = fir.declare %2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref>>>) -> !fir.ref>>> + %7 = fir.load %6 : !fir.ref>>> + %21 = cuf.allocate %1 : !fir.ref>>> source(%7 : !fir.box>>) {data_attr = #cuf.cuda} -> i32 + return +} + +// CHECK-LABEL: func.func @_QMmod1Pallocate_source_global() +// CHECK: fir.call @_FortranACUFAllocatableAllocateSourceSync + } // end of module diff --git a/flang/test/Lower/OpenMP/Todo/flush-seq-cst.f90 b/flang/test/Lower/OpenMP/Todo/flush-seq-cst.f90 new file mode 100644 index 00000000000000..753e1cfcd7aa50 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/flush-seq-cst.f90 @@ -0,0 +1,6 @@ +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s + +! CHECK: not yet implemented: Unhandled clause SEQ_CST in FLUSH construct +program flush_seq_cst + !$omp flush seq_cst +end program \ No newline at end of file diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 406d30b38948ea..8dd6d10200cd3d 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=50 +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=51 use omp_lib ! Check OpenMP clause validity for the following directives: ! @@ -507,7 +507,6 @@ !$omp flush acquire !ERROR: If memory-order-clause is RELEASE, ACQUIRE, or ACQ_REL, list items must not be specified on the FLUSH directive !$omp flush release (c) - !ERROR: SEQ_CST clause is not allowed on the FLUSH directive !$omp flush seq_cst !ERROR: RELAXED clause is not allowed on the FLUSH directive !$omp flush relaxed diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90 index ed0cf6602d574a..615332c6cf31cb 100644 --- a/flang/test/Semantics/OpenMP/flush02.f90 +++ b/flang/test/Semantics/OpenMP/flush02.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=50 +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=51 ! Check OpenMP 5.0 - 2.17.8 flush Construct ! Restriction - @@ -27,7 +27,6 @@ !Only memory-order-clauses. if (omp_get_thread_num() == 1) THEN ! Not allowed clauses. - !ERROR: SEQ_CST clause is not allowed on the FLUSH directive !$omp flush seq_cst !ERROR: RELAXED clause is not allowed on the FLUSH directive !$omp flush relaxed @@ -41,7 +40,6 @@ !$omp flush acquire acquire ! Mix of allowed and not allowed. - !ERROR: SEQ_CST clause is not allowed on the FLUSH directive !$omp flush seq_cst acquire END IF diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt index 9a89a6185a3291..06b61e5951881b 100644 --- a/flang/tools/flang-driver/CMakeLists.txt +++ b/flang/tools/flang-driver/CMakeLists.txt @@ -6,6 +6,7 @@ link_directories(${LLVM_LIBRARY_DIR}) set( LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} + MC Option Support TargetParser diff --git a/flang/tools/flang-driver/fc1_main.cpp b/flang/tools/flang-driver/fc1_main.cpp index b5b062aaac2671..561a0dd5524e37 100644 --- a/flang/tools/flang-driver/fc1_main.cpp +++ b/flang/tools/flang-driver/fc1_main.cpp @@ -21,15 +21,35 @@ #include "flang/Frontend/TextDiagnosticBuffer.h" #include "flang/FrontendTool/Utils.h" #include "clang/Driver/DriverDiagnostic.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptTable.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" #include using namespace Fortran::frontend; +/// Print supported cpus of the given target. +static int printSupportedCPUs(llvm::StringRef triple) { + std::string error; + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple, error); + if (!target) { + llvm::errs() << error; + return 1; + } + + // the target machine will handle the mcpu printing + llvm::TargetOptions targetOpts; + std::unique_ptr targetMachine( + target->createTargetMachine(triple, "", "+cpuhelp", targetOpts, + std::nullopt)); + return 0; +} + int fc1_main(llvm::ArrayRef argv, const char *argv0) { // Create CompilerInstance std::unique_ptr flang(new CompilerInstance()); @@ -58,6 +78,10 @@ int fc1_main(llvm::ArrayRef argv, const char *argv0) { llvm::InitializeAllTargetMCs(); llvm::InitializeAllAsmPrinters(); + // --print-supported-cpus takes priority over the actual compilation. + if (flang->getFrontendOpts().printSupportedCPUs) + return printSupportedCPUs(flang->getInvocation().getTargetOpts().triple); + diagsBuffer->flushDiagnostics(flang->getDiagnostics()); if (!success) diff --git a/libc/src/__support/RPC/rpc.h b/libc/shared/rpc.h similarity index 80% rename from libc/src/__support/RPC/rpc.h rename to libc/shared/rpc.h index 30dd2c1a8125d7..c5e4277286c399 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/shared/rpc.h @@ -15,16 +15,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_H -#define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_H +#ifndef LLVM_LIBC_SHARED_RPC_H +#define LLVM_LIBC_SHARED_RPC_H #include "rpc_util.h" -#include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" #include -namespace LIBC_NAMESPACE_DECL { +#ifndef RPC_INLINE +#define RPC_INLINE inline +#endif + namespace rpc { /// Use scoped atomic variants if they are available for the target. @@ -41,6 +42,13 @@ namespace rpc { #define __scoped_atomic_thread_fence(ord, scp) __atomic_thread_fence(ord) #endif +/// Generic codes that can be used whem implementing the server. +enum Status { + SUCCESS = 0x0, + ERROR = 0x1000, + UNHANDLED_OPCODE = 0x1001, +}; + /// A fixed size channel used to communicate between the RPC client and server. struct Buffer { uint64_t data[8]; @@ -70,12 +78,12 @@ constexpr static uint64_t MAX_PORT_COUNT = 4096; /// - The server will always start with a 'recv' operation. /// - Every 'send' or 'recv' call is mirrored by the other process. template struct Process { - LIBC_INLINE Process() = default; - LIBC_INLINE Process(const Process &) = delete; - LIBC_INLINE Process &operator=(const Process &) = delete; - LIBC_INLINE Process(Process &&) = default; - LIBC_INLINE Process &operator=(Process &&) = default; - LIBC_INLINE ~Process() = default; + RPC_INLINE Process() = default; + RPC_INLINE Process(const Process &) = delete; + RPC_INLINE Process &operator=(const Process &) = delete; + RPC_INLINE Process(Process &&) = default; + RPC_INLINE Process &operator=(Process &&) = default; + RPC_INLINE ~Process() = default; uint32_t port_count = 0; uint32_t *inbox = nullptr; @@ -86,7 +94,7 @@ template struct Process { static constexpr uint64_t NUM_BITS_IN_WORD = sizeof(uint32_t) * 8; uint32_t lock[MAX_PORT_COUNT / NUM_BITS_IN_WORD] = {0}; - LIBC_INLINE Process(uint32_t port_count, void *buffer) + RPC_INLINE Process(uint32_t port_count, void *buffer) : port_count(port_count), inbox(reinterpret_cast( advance(buffer, inbox_offset(port_count)))), outbox(reinterpret_cast( @@ -105,20 +113,20 @@ template struct Process { /// Header header[port_count]; /// Buffer packet[port_count][lane_size]; /// }; - LIBC_INLINE static constexpr uint64_t allocation_size(uint32_t port_count, - uint32_t lane_size) { + RPC_INLINE static constexpr uint64_t allocation_size(uint32_t port_count, + uint32_t lane_size) { return buffer_offset(port_count) + buffer_bytes(port_count, lane_size); } /// Retrieve the inbox state from memory shared between processes. - LIBC_INLINE uint32_t load_inbox(uint64_t lane_mask, uint32_t index) const { + RPC_INLINE uint32_t load_inbox(uint64_t lane_mask, uint32_t index) const { return rpc::broadcast_value( lane_mask, __scoped_atomic_load_n(&inbox[index], __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM)); } /// Retrieve the outbox state from memory shared between processes. - LIBC_INLINE uint32_t load_outbox(uint64_t lane_mask, uint32_t index) const { + RPC_INLINE uint32_t load_outbox(uint64_t lane_mask, uint32_t index) const { return rpc::broadcast_value( lane_mask, __scoped_atomic_load_n(&outbox[index], __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM)); @@ -128,7 +136,7 @@ template struct Process { /// Equivalent to loading outbox followed by store of the inverted value /// The outbox is write only by this warp and tracking the value locally is /// cheaper than calling load_outbox to get the value to store. - LIBC_INLINE uint32_t invert_outbox(uint32_t index, uint32_t current_outbox) { + RPC_INLINE uint32_t invert_outbox(uint32_t index, uint32_t current_outbox) { uint32_t inverted_outbox = !current_outbox; __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM); __scoped_atomic_store_n(&outbox[index], inverted_outbox, __ATOMIC_RELAXED, @@ -138,8 +146,8 @@ template struct Process { // Given the current outbox and inbox values, wait until the inbox changes // to indicate that this thread owns the buffer element. - LIBC_INLINE void wait_for_ownership(uint64_t lane_mask, uint32_t index, - uint32_t outbox, uint32_t in) { + RPC_INLINE void wait_for_ownership(uint64_t lane_mask, uint32_t index, + uint32_t outbox, uint32_t in) { while (buffer_unavailable(in, outbox)) { sleep_briefly(); in = load_inbox(lane_mask, index); @@ -150,14 +158,14 @@ template struct Process { /// The packet is a linearly allocated array of buffers used to communicate /// with the other process. This function returns the appropriate slot in this /// array such that the process can operate on an entire warp or wavefront. - LIBC_INLINE Buffer *get_packet(uint32_t index, uint32_t lane_size) { + RPC_INLINE Buffer *get_packet(uint32_t index, uint32_t lane_size) { return &packet[index * lane_size]; } /// Determines if this process needs to wait for ownership of the buffer. We /// invert the condition on one of the processes to indicate that if one /// process owns the buffer then the other does not. - LIBC_INLINE static bool buffer_unavailable(uint32_t in, uint32_t out) { + RPC_INLINE static bool buffer_unavailable(uint32_t in, uint32_t out) { bool cond = in != out; return Invert ? !cond : cond; } @@ -166,7 +174,7 @@ template struct Process { /// lane_mask is a bitmap of the threads in the warp that would hold the /// single lock on success, e.g. the result of rpc::get_lane_mask() /// The lock is held when the n-th bit of the lock bitfield is set. - LIBC_INLINE bool try_lock(uint64_t lane_mask, uint32_t index) { + RPC_INLINE bool try_lock(uint64_t lane_mask, uint32_t index) { // On amdgpu, test and set to the nth lock bit and a sync_lane would suffice // On volta, need to handle differences between the threads running and // the threads that were detected in the previous call to get_lane_mask() @@ -206,7 +214,7 @@ template struct Process { /// Unlock the lock at index. We need a lane sync to keep this function /// convergent, otherwise the compiler will sink the store and deadlock. - LIBC_INLINE void unlock(uint64_t lane_mask, uint32_t index) { + RPC_INLINE void unlock(uint64_t lane_mask, uint32_t index) { // Do not move any writes past the unlock. __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_DEVICE); @@ -219,40 +227,40 @@ template struct Process { } /// Number of bytes to allocate for an inbox or outbox. - LIBC_INLINE static constexpr uint64_t mailbox_bytes(uint32_t port_count) { + RPC_INLINE static constexpr uint64_t mailbox_bytes(uint32_t port_count) { return port_count * sizeof(uint32_t); } /// Number of bytes to allocate for the buffer containing the packets. - LIBC_INLINE static constexpr uint64_t buffer_bytes(uint32_t port_count, - uint32_t lane_size) { + RPC_INLINE static constexpr uint64_t buffer_bytes(uint32_t port_count, + uint32_t lane_size) { return port_count * lane_size * sizeof(Buffer); } /// Offset of the inbox in memory. This is the same as the outbox if inverted. - LIBC_INLINE static constexpr uint64_t inbox_offset(uint32_t port_count) { + RPC_INLINE static constexpr uint64_t inbox_offset(uint32_t port_count) { return Invert ? mailbox_bytes(port_count) : 0; } /// Offset of the outbox in memory. This is the same as the inbox if inverted. - LIBC_INLINE static constexpr uint64_t outbox_offset(uint32_t port_count) { + RPC_INLINE static constexpr uint64_t outbox_offset(uint32_t port_count) { return Invert ? 0 : mailbox_bytes(port_count); } /// Offset of the buffer containing the packets after the inbox and outbox. - LIBC_INLINE static constexpr uint64_t header_offset(uint32_t port_count) { + RPC_INLINE static constexpr uint64_t header_offset(uint32_t port_count) { return align_up(2 * mailbox_bytes(port_count), alignof(Header)); } /// Offset of the buffer containing the packets after the inbox and outbox. - LIBC_INLINE static constexpr uint64_t buffer_offset(uint32_t port_count) { + RPC_INLINE static constexpr uint64_t buffer_offset(uint32_t port_count) { return align_up(header_offset(port_count) + port_count * sizeof(Header), alignof(Buffer)); } /// Conditionally set the n-th bit in the atomic bitfield. - LIBC_INLINE static constexpr uint32_t set_nth(uint32_t *bits, uint32_t index, - bool cond) { + RPC_INLINE static constexpr uint32_t set_nth(uint32_t *bits, uint32_t index, + bool cond) { uint32_t slot = index / NUM_BITS_IN_WORD; uint32_t bit = index % NUM_BITS_IN_WORD; return __scoped_atomic_fetch_or(&bits[slot], @@ -262,8 +270,8 @@ template struct Process { } /// Conditionally clear the n-th bit in the atomic bitfield. - LIBC_INLINE static constexpr uint32_t clear_nth(uint32_t *bits, - uint32_t index, bool cond) { + RPC_INLINE static constexpr uint32_t clear_nth(uint32_t *bits, uint32_t index, + bool cond) { uint32_t slot = index / NUM_BITS_IN_WORD; uint32_t bit = index % NUM_BITS_IN_WORD; return __scoped_atomic_fetch_and(&bits[slot], @@ -275,8 +283,8 @@ template struct Process { /// Invokes a function accross every active buffer across the total lane size. template -LIBC_INLINE static void invoke_rpc(F &&fn, uint32_t lane_size, - uint64_t lane_mask, Buffer *slot) { +RPC_INLINE static void invoke_rpc(F &&fn, uint32_t lane_size, + uint64_t lane_mask, Buffer *slot) { if constexpr (is_process_gpu()) { fn(&slot[rpc::get_lane_id()], rpc::get_lane_id()); } else { @@ -290,40 +298,40 @@ LIBC_INLINE static void invoke_rpc(F &&fn, uint32_t lane_size, /// processes. A port is conceptually an index into the memory provided by the /// underlying process that is guarded by a lock bit. template struct Port { - LIBC_INLINE Port(Process &process, uint64_t lane_mask, uint32_t lane_size, - uint32_t index, uint32_t out) + RPC_INLINE Port(Process &process, uint64_t lane_mask, uint32_t lane_size, + uint32_t index, uint32_t out) : process(process), lane_mask(lane_mask), lane_size(lane_size), index(index), out(out), receive(false), owns_buffer(true) {} - LIBC_INLINE ~Port() = default; + RPC_INLINE ~Port() = default; private: - LIBC_INLINE Port(const Port &) = delete; - LIBC_INLINE Port &operator=(const Port &) = delete; - LIBC_INLINE Port(Port &&) = default; - LIBC_INLINE Port &operator=(Port &&) = default; + RPC_INLINE Port(const Port &) = delete; + RPC_INLINE Port &operator=(const Port &) = delete; + RPC_INLINE Port(Port &&) = default; + RPC_INLINE Port &operator=(Port &&) = default; friend struct Client; friend struct Server; friend class rpc::optional>; public: - template LIBC_INLINE void recv(U use); - template LIBC_INLINE void send(F fill); + template RPC_INLINE void recv(U use); + template RPC_INLINE void send(F fill); template - LIBC_INLINE void send_and_recv(F fill, U use); - template LIBC_INLINE void recv_and_send(W work); - LIBC_INLINE void send_n(const void *const *src, uint64_t *size); - LIBC_INLINE void send_n(const void *src, uint64_t size); + RPC_INLINE void send_and_recv(F fill, U use); + template RPC_INLINE void recv_and_send(W work); + RPC_INLINE void send_n(const void *const *src, uint64_t *size); + RPC_INLINE void send_n(const void *src, uint64_t size); template - LIBC_INLINE void recv_n(void **dst, uint64_t *size, A &&alloc); + RPC_INLINE void recv_n(void **dst, uint64_t *size, A &&alloc); - LIBC_INLINE uint32_t get_opcode() const { + RPC_INLINE uint32_t get_opcode() const { return process.header[index].opcode; } - LIBC_INLINE uint32_t get_index() const { return index; } + RPC_INLINE uint32_t get_index() const { return index; } - LIBC_INLINE void close() { + RPC_INLINE void close() { // Wait for all lanes to finish using the port. rpc::sync_lane(lane_mask); @@ -346,16 +354,16 @@ template struct Port { /// The RPC client used to make requests to the server. struct Client { - LIBC_INLINE Client() = default; - LIBC_INLINE Client(const Client &) = delete; - LIBC_INLINE Client &operator=(const Client &) = delete; - LIBC_INLINE ~Client() = default; + RPC_INLINE Client() = default; + RPC_INLINE Client(const Client &) = delete; + RPC_INLINE Client &operator=(const Client &) = delete; + RPC_INLINE ~Client() = default; - LIBC_INLINE Client(uint32_t port_count, void *buffer) + RPC_INLINE Client(uint32_t port_count, void *buffer) : process(port_count, buffer) {} using Port = rpc::Port; - template LIBC_INLINE Port open(); + template RPC_INLINE Port open(); private: Process process; @@ -363,21 +371,21 @@ struct Client { /// The RPC server used to respond to the client. struct Server { - LIBC_INLINE Server() = default; - LIBC_INLINE Server(const Server &) = delete; - LIBC_INLINE Server &operator=(const Server &) = delete; - LIBC_INLINE ~Server() = default; + RPC_INLINE Server() = default; + RPC_INLINE Server(const Server &) = delete; + RPC_INLINE Server &operator=(const Server &) = delete; + RPC_INLINE ~Server() = default; - LIBC_INLINE Server(uint32_t port_count, void *buffer) + RPC_INLINE Server(uint32_t port_count, void *buffer) : process(port_count, buffer) {} using Port = rpc::Port; - LIBC_INLINE rpc::optional try_open(uint32_t lane_size, - uint32_t start = 0); - LIBC_INLINE Port open(uint32_t lane_size); + RPC_INLINE rpc::optional try_open(uint32_t lane_size, + uint32_t start = 0); + RPC_INLINE Port open(uint32_t lane_size); - LIBC_INLINE static uint64_t allocation_size(uint32_t lane_size, - uint32_t port_count) { + RPC_INLINE static uint64_t allocation_size(uint32_t lane_size, + uint32_t port_count) { return Process::allocation_size(port_count, lane_size); } @@ -386,7 +394,7 @@ struct Server { }; /// Applies \p fill to the shared buffer and initiates a send operation. -template template LIBC_INLINE void Port::send(F fill) { +template template RPC_INLINE void Port::send(F fill) { uint32_t in = owns_buffer ? out ^ T : process.load_inbox(lane_mask, index); // We need to wait until we own the buffer before sending. @@ -401,7 +409,7 @@ template template LIBC_INLINE void Port::send(F fill) { } /// Applies \p use to the shared buffer and acknowledges the send. -template template LIBC_INLINE void Port::recv(U use) { +template template RPC_INLINE void Port::recv(U use) { // We only exchange ownership of the buffer during a receive if we are waiting // for a previous receive to finish. if (receive) { @@ -424,7 +432,7 @@ template template LIBC_INLINE void Port::recv(U use) { /// Combines a send and receive into a single function. template template -LIBC_INLINE void Port::send_and_recv(F fill, U use) { +RPC_INLINE void Port::send_and_recv(F fill, U use) { send(fill); recv(use); } @@ -434,7 +442,7 @@ LIBC_INLINE void Port::send_and_recv(F fill, U use) { /// the copy back. template template -LIBC_INLINE void Port::recv_and_send(W work) { +RPC_INLINE void Port::recv_and_send(W work) { recv(work); send([](Buffer *, uint32_t) { /* no-op */ }); } @@ -442,7 +450,7 @@ LIBC_INLINE void Port::recv_and_send(W work) { /// Helper routine to simplify the interface when sending from the GPU using /// thread private pointers to the underlying value. template -LIBC_INLINE void Port::send_n(const void *src, uint64_t size) { +RPC_INLINE void Port::send_n(const void *src, uint64_t size) { const void **src_ptr = &src; uint64_t *size_ptr = &size; send_n(src_ptr, size_ptr); @@ -451,7 +459,7 @@ LIBC_INLINE void Port::send_n(const void *src, uint64_t size) { /// Sends an arbitrarily sized data buffer \p src across the shared channel in /// multiples of the packet length. template -LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { +RPC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { uint64_t num_sends = 0; send([&](Buffer *buffer, uint32_t id) { reinterpret_cast(buffer->data)[0] = lane_value(size, id); @@ -482,7 +490,7 @@ LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { /// size of the data so that we can initialize the size of the \p dst buffer. template template -LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { +RPC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { uint64_t num_recvs = 0; recv([&](Buffer *buffer, uint32_t id) { lane_value(size, id) = reinterpret_cast(buffer->data)[0]; @@ -516,7 +524,7 @@ LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { /// port. Each port instance uses an associated \p opcode to tell the server /// what to do. The Client interface provides the appropriate lane size to the /// port using the platform's returned value. -template LIBC_INLINE Client::Port Client::open() { +template RPC_INLINE Client::Port Client::open() { // Repeatedly perform a naive linear scan for a port that can be opened to // send data. for (uint32_t index = 0;; ++index) { @@ -550,7 +558,7 @@ template LIBC_INLINE Client::Port Client::open() { /// Attempts to open a port to use as the server. The server can only open a /// port if it has a pending receive operation -LIBC_INLINE rpc::optional +RPC_INLINE rpc::optional Server::try_open(uint32_t lane_size, uint32_t start) { // Perform a naive linear scan for a port that has a pending request. for (uint32_t index = start; index < process.port_count; ++index) { @@ -580,7 +588,7 @@ Server::try_open(uint32_t lane_size, uint32_t start) { return rpc::nullopt; } -LIBC_INLINE Server::Port Server::open(uint32_t lane_size) { +RPC_INLINE Server::Port Server::open(uint32_t lane_size) { for (;;) { if (rpc::optional p = try_open(lane_size)) return rpc::move(p.value()); @@ -599,6 +607,5 @@ LIBC_INLINE Server::Port Server::open(uint32_t lane_size) { #endif } // namespace rpc -} // namespace LIBC_NAMESPACE_DECL -#endif +#endif // LLVM_LIBC_SHARED_RPC_H diff --git a/libc/src/__support/RPC/rpc_util.h b/libc/shared/rpc_util.h similarity index 61% rename from libc/src/__support/RPC/rpc_util.h rename to libc/shared/rpc_util.h index 7067dfc974eb31..502014d839ae94 100644 --- a/libc/src/__support/RPC/rpc_util.h +++ b/libc/shared/rpc_util.h @@ -6,11 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H -#define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H - -#include "src/__support/macros/attributes.h" -#include "src/__support/macros/config.h" +#ifndef LLVM_LIBC_SHARED_RPC_UTIL_H +#define LLVM_LIBC_SHARED_RPC_UTIL_H #include #include @@ -20,7 +17,10 @@ #define RPC_TARGET_IS_GPU #endif -namespace LIBC_NAMESPACE_DECL { +#ifndef RPC_INLINE +#define RPC_INLINE inline +#endif + namespace rpc { template struct type_identity { @@ -40,26 +40,26 @@ template struct is_const : type_constant {}; /// Freestanding implementation of std::move. template -LIBC_INLINE constexpr typename remove_reference::type &&move(T &&t) { +RPC_INLINE constexpr typename remove_reference::type &&move(T &&t) { return static_cast::type &&>(t); } /// Freestanding implementation of std::forward. template -LIBC_INLINE constexpr T &&forward(typename remove_reference::type &value) { +RPC_INLINE constexpr T &&forward(typename remove_reference::type &value) { return static_cast(value); } template -LIBC_INLINE constexpr T &&forward(typename remove_reference::type &&value) { +RPC_INLINE constexpr T &&forward(typename remove_reference::type &&value) { return static_cast(value); } struct in_place_t { - LIBC_INLINE explicit in_place_t() = default; + RPC_INLINE explicit in_place_t() = default; }; struct nullopt_t { - LIBC_INLINE constexpr explicit nullopt_t() = default; + RPC_INLINE constexpr explicit nullopt_t() = default; }; constexpr inline in_place_t in_place{}; @@ -75,15 +75,15 @@ template class optional { bool in_use = false; - LIBC_INLINE ~OptionalStorage() { reset(); } + RPC_INLINE ~OptionalStorage() { reset(); } - LIBC_INLINE constexpr OptionalStorage() : empty() {} + RPC_INLINE constexpr OptionalStorage() : empty() {} template - LIBC_INLINE constexpr explicit OptionalStorage(in_place_t, Args &&...args) + RPC_INLINE constexpr explicit OptionalStorage(in_place_t, Args &&...args) : stored_value(forward(args)...) {} - LIBC_INLINE constexpr void reset() { + RPC_INLINE constexpr void reset() { if (in_use) stored_value.~U(); in_use = false; @@ -93,60 +93,54 @@ template class optional { OptionalStorage storage; public: - LIBC_INLINE constexpr optional() = default; - LIBC_INLINE constexpr optional(nullopt_t) {} + RPC_INLINE constexpr optional() = default; + RPC_INLINE constexpr optional(nullopt_t) {} - LIBC_INLINE constexpr optional(const T &t) : storage(in_place, t) { + RPC_INLINE constexpr optional(const T &t) : storage(in_place, t) { storage.in_use = true; } - LIBC_INLINE constexpr optional(const optional &) = default; + RPC_INLINE constexpr optional(const optional &) = default; - LIBC_INLINE constexpr optional(T &&t) : storage(in_place, move(t)) { + RPC_INLINE constexpr optional(T &&t) : storage(in_place, move(t)) { storage.in_use = true; } - LIBC_INLINE constexpr optional(optional &&O) = default; + RPC_INLINE constexpr optional(optional &&O) = default; - LIBC_INLINE constexpr optional &operator=(T &&t) { + RPC_INLINE constexpr optional &operator=(T &&t) { storage = move(t); return *this; } - LIBC_INLINE constexpr optional &operator=(optional &&) = default; + RPC_INLINE constexpr optional &operator=(optional &&) = default; - LIBC_INLINE constexpr optional &operator=(const T &t) { + RPC_INLINE constexpr optional &operator=(const T &t) { storage = t; return *this; } - LIBC_INLINE constexpr optional &operator=(const optional &) = default; + RPC_INLINE constexpr optional &operator=(const optional &) = default; - LIBC_INLINE constexpr void reset() { storage.reset(); } + RPC_INLINE constexpr void reset() { storage.reset(); } - LIBC_INLINE constexpr const T &value() const & { - return storage.stored_value; - } + RPC_INLINE constexpr const T &value() const & { return storage.stored_value; } - LIBC_INLINE constexpr T &value() & { return storage.stored_value; } + RPC_INLINE constexpr T &value() & { return storage.stored_value; } - LIBC_INLINE constexpr explicit operator bool() const { - return storage.in_use; - } - LIBC_INLINE constexpr bool has_value() const { return storage.in_use; } - LIBC_INLINE constexpr const T *operator->() const { + RPC_INLINE constexpr explicit operator bool() const { return storage.in_use; } + RPC_INLINE constexpr bool has_value() const { return storage.in_use; } + RPC_INLINE constexpr const T *operator->() const { return &storage.stored_value; } - LIBC_INLINE constexpr T *operator->() { return &storage.stored_value; } - LIBC_INLINE constexpr const T &operator*() const & { + RPC_INLINE constexpr T *operator->() { return &storage.stored_value; } + RPC_INLINE constexpr const T &operator*() const & { return storage.stored_value; } - LIBC_INLINE constexpr T &operator*() & { return storage.stored_value; } + RPC_INLINE constexpr T &operator*() & { return storage.stored_value; } - LIBC_INLINE constexpr T &&value() && { return move(storage.stored_value); } - LIBC_INLINE constexpr T &&operator*() && { - return move(storage.stored_value); - } + RPC_INLINE constexpr T &&value() && { return move(storage.stored_value); } + RPC_INLINE constexpr T &&operator*() && { return move(storage.stored_value); } }; /// Suspend the thread briefly to assist the thread scheduler during busy loops. -LIBC_INLINE void sleep_briefly() { +RPC_INLINE void sleep_briefly() { #if defined(LIBC_TARGET_ARCH_IS_NVPTX) if (__nvvm_reflect("__CUDA_ARCH") >= 700) asm("nanosleep.u32 64;" ::: "memory"); @@ -164,7 +158,7 @@ LIBC_INLINE void sleep_briefly() { } /// Conditional to indicate if this process is running on the GPU. -LIBC_INLINE constexpr bool is_process_gpu() { +RPC_INLINE constexpr bool is_process_gpu() { #ifdef RPC_TARGET_IS_GPU return true; #else @@ -173,14 +167,14 @@ LIBC_INLINE constexpr bool is_process_gpu() { } /// Wait for all lanes in the group to complete. -LIBC_INLINE void sync_lane(uint64_t lane_mask) { +RPC_INLINE void sync_lane(uint64_t lane_mask) { #ifdef RPC_TARGET_IS_GPU return __gpu_sync_lane(lane_mask); #endif } /// Copies the value from the first active thread to the rest. -LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { +RPC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { #ifdef RPC_TARGET_IS_GPU return __gpu_read_first_lane_u32(lane_mask, x); #else @@ -189,7 +183,7 @@ LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { } /// Returns the number lanes that participate in the RPC interface. -LIBC_INLINE uint32_t get_num_lanes() { +RPC_INLINE uint32_t get_num_lanes() { #ifdef RPC_TARGET_IS_GPU return __gpu_num_lanes(); #else @@ -198,7 +192,7 @@ LIBC_INLINE uint32_t get_num_lanes() { } /// Returns the id of the thread inside of an AMD wavefront executing together. -LIBC_INLINE uint64_t get_lane_mask() { +RPC_INLINE uint64_t get_lane_mask() { #ifdef RPC_TARGET_IS_GPU return __gpu_lane_mask(); #else @@ -207,7 +201,7 @@ LIBC_INLINE uint64_t get_lane_mask() { } /// Returns the id of the thread inside of an AMD wavefront executing together. -LIBC_INLINE uint32_t get_lane_id() { +RPC_INLINE uint32_t get_lane_id() { #ifdef RPC_TARGET_IS_GPU return __gpu_lane_id(); #else @@ -216,7 +210,7 @@ LIBC_INLINE uint32_t get_lane_id() { } /// Conditional that is only true for a single thread in a lane. -LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { +RPC_INLINE bool is_first_lane(uint64_t lane_mask) { #ifdef RPC_TARGET_IS_GPU return __gpu_is_first_in_lane(lane_mask); #else @@ -225,7 +219,7 @@ LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { } /// Returns a bitmask of threads in the current lane for which \p x is true. -LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { +RPC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { #ifdef RPC_TARGET_IS_GPU return __gpu_ballot(lane_mask, x); #else @@ -235,7 +229,7 @@ LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { /// Return \p val aligned "upwards" according to \p align. template -LIBC_INLINE constexpr V align_up(V val, A align) { +RPC_INLINE constexpr V align_up(V val, A align) { return ((val + V(align) - 1) / V(align)) * V(align); } @@ -243,14 +237,14 @@ LIBC_INLINE constexpr V align_up(V val, A align) { /// model. On the GPU stack variables are always private to a lane so we can /// simply use the variable passed in. On the CPU we need to allocate enough /// space for the whole lane and index into it. -template LIBC_INLINE V &lane_value(V *val, uint32_t id) { +template RPC_INLINE V &lane_value(V *val, uint32_t id) { if constexpr (is_process_gpu()) return *val; return val[id]; } /// Advance the \p p by \p bytes. -template LIBC_INLINE T *advance(T *ptr, U bytes) { +template RPC_INLINE T *advance(T *ptr, U bytes) { if constexpr (is_const::value) return reinterpret_cast(reinterpret_cast(ptr) + bytes); @@ -259,15 +253,14 @@ template LIBC_INLINE T *advance(T *ptr, U bytes) { } /// Wrapper around the optimal memory copy implementation for the target. -LIBC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) { +RPC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) { __builtin_memcpy(dst, src, count); } -template LIBC_INLINE constexpr const T &max(const T &a, const T &b) { +template RPC_INLINE constexpr const T &max(const T &a, const T &b) { return (a < b) ? b : a; } } // namespace rpc -} // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H +#endif // LLVM_LIBC_SHARED_RPC_UTIL_H diff --git a/libc/src/__support/RPC/CMakeLists.txt b/libc/src/__support/RPC/CMakeLists.txt index 183fc6f8683e06..0a7141fb60bf03 100644 --- a/libc/src/__support/RPC/CMakeLists.txt +++ b/libc/src/__support/RPC/CMakeLists.txt @@ -2,20 +2,6 @@ if(NOT LIBC_TARGET_OS_IS_GPU) return() endif() -add_header_library( - rpc - HDRS - rpc.h - rpc_util.h - DEPENDS - libc.src.__support.common - libc.src.__support.CPP.algorithm - libc.src.__support.CPP.atomic - libc.src.__support.CPP.functional - libc.src.__support.CPP.optional - libc.src.__support.GPU.utils -) - add_object_library( rpc_client SRCS @@ -25,5 +11,4 @@ add_object_library( DEPENDS libc.include.gpu_rpc libc.src.__support.GPU.utils - .rpc ) diff --git a/libc/src/__support/RPC/rpc_client.cpp b/libc/src/__support/RPC/rpc_client.cpp index 232b20d008d1d5..c26cf9ca2ddbe6 100644 --- a/libc/src/__support/RPC/rpc_client.cpp +++ b/libc/src/__support/RPC/rpc_client.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "rpc_client.h" -#include "rpc.h" + #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/__support/RPC/rpc_client.h b/libc/src/__support/RPC/rpc_client.h index 7bd6d0b5e00b47..8923e62e0e22a0 100644 --- a/libc/src/__support/RPC/rpc_client.h +++ b/libc/src/__support/RPC/rpc_client.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_CLIENT_H #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_CLIENT_H -#include "rpc.h" +#include "shared/rpc.h" #include "include/llvm-libc-types/rpc_opcodes_t.h" #include "src/__support/CPP/type_traits.h" @@ -18,6 +18,12 @@ namespace LIBC_NAMESPACE_DECL { namespace rpc { +using ::rpc::Buffer; +using ::rpc::Client; +using ::rpc::Port; +using ::rpc::Process; +using ::rpc::Server; + static_assert(cpp::is_trivially_copyable::value && sizeof(Process) == sizeof(Process), "The client is not trivially copyable from the server"); diff --git a/libc/src/__support/block.h b/libc/src/__support/block.h index e63801301ac752..9ca3f11530c4ba 100644 --- a/libc/src/__support/block.h +++ b/libc/src/__support/block.h @@ -68,13 +68,11 @@ using cpp::optional; /// The blocks store their offsets to the previous and next blocks. The latter /// is also the block's size. /// -/// The `ALIGNMENT` constant provided by the derived block is typically the -/// minimum value of `alignof(OffsetType)`. Blocks will always be aligned to a -/// `ALIGNMENT` boundary. Block sizes will always be rounded up to a multiple of -/// `ALIGNMENT`. +/// Blocks will always be aligned to a `ALIGNMENT` boundary. Block sizes will +/// always be rounded up to a multiple of `ALIGNMENT`. /// -/// As an example, the diagram below represents two contiguous -/// `Block`s. The indices indicate byte offsets: +/// As an example, the diagram below represents two contiguous `Block`s. The +/// indices indicate byte offsets: /// /// @code{.unparsed} /// Block 1: @@ -117,17 +115,6 @@ using cpp::optional; /// /// The next offset of a block matches the previous offset of its next block. /// The first block in a list is denoted by having a previous offset of `0`. -/// -/// @tparam OffsetType Unsigned integral type used to encode offsets. Larger -/// types can address more memory, but consume greater -/// overhead. -/// @tparam kAlign Sets the overall alignment for blocks. Minimum is -/// `alignof(OffsetType)`, but the default is max_align_t, -/// since the usable space will then already be -/// aligned to max_align_t if the size of OffsetType is no -/// less than half of max_align_t. Larger values cause -/// greater overhead. -template class Block { // Masks for the contents of the next_ field. static constexpr size_t PREV_FREE_MASK = 1 << 0; @@ -135,12 +122,8 @@ class Block { static constexpr size_t SIZE_MASK = ~(PREV_FREE_MASK | LAST_MASK); public: - using offset_type = OffsetType; - static_assert(cpp::is_unsigned_v, - "offset type must be unsigned"); - static constexpr size_t ALIGNMENT = - cpp::max(cpp::max(kAlign, alignof(offset_type)), size_t{4}); - static constexpr size_t BLOCK_OVERHEAD = align_up(sizeof(Block), ALIGNMENT); + static constexpr size_t ALIGNMENT = cpp::max(alignof(max_align_t), size_t{4}); + static const size_t BLOCK_OVERHEAD; // No copy or move. Block(const Block &other) = delete; @@ -157,26 +140,26 @@ class Block { /// /// @warning This method does not do any checking; passing a random /// pointer will return a non-null pointer. - static Block *from_usable_space(void *usable_space) { + LIBC_INLINE static Block *from_usable_space(void *usable_space) { auto *bytes = reinterpret_cast(usable_space); return reinterpret_cast(bytes - BLOCK_OVERHEAD); } - static const Block *from_usable_space(const void *usable_space) { + LIBC_INLINE static const Block *from_usable_space(const void *usable_space) { const auto *bytes = reinterpret_cast(usable_space); return reinterpret_cast(bytes - BLOCK_OVERHEAD); } /// @returns The total size of the block in bytes, including the header. - size_t outer_size() const { return next_ & SIZE_MASK; } + LIBC_INLINE size_t outer_size() const { return next_ & SIZE_MASK; } - static size_t outer_size(size_t inner_size) { + LIBC_INLINE static size_t outer_size(size_t inner_size) { // The usable region includes the prev_ field of the next block. return inner_size - sizeof(prev_) + BLOCK_OVERHEAD; } /// @returns The number of usable bytes inside the block were it to be /// allocated. - size_t inner_size() const { + LIBC_INLINE size_t inner_size() const { if (!next()) return 0; return inner_size(outer_size()); @@ -184,13 +167,13 @@ class Block { /// @returns The number of usable bytes inside a block with the given outer /// size were it to be allocated. - static size_t inner_size(size_t outer_size) { + LIBC_INLINE static size_t inner_size(size_t outer_size) { // The usable region includes the prev_ field of the next block. return inner_size_free(outer_size) + sizeof(prev_); } /// @returns The number of usable bytes inside the block if it remains free. - size_t inner_size_free() const { + LIBC_INLINE size_t inner_size_free() const { if (!next()) return 0; return inner_size_free(outer_size()); @@ -198,20 +181,20 @@ class Block { /// @returns The number of usable bytes inside a block with the given outer /// size if it remains free. - static size_t inner_size_free(size_t outer_size) { + LIBC_INLINE static size_t inner_size_free(size_t outer_size) { return outer_size - BLOCK_OVERHEAD; } /// @returns A pointer to the usable space inside this block. - cpp::byte *usable_space() { + LIBC_INLINE cpp::byte *usable_space() { return reinterpret_cast(this) + BLOCK_OVERHEAD; } - const cpp::byte *usable_space() const { + LIBC_INLINE const cpp::byte *usable_space() const { return reinterpret_cast(this) + BLOCK_OVERHEAD; } // @returns The region of memory the block manages, including the header. - ByteSpan region() { + LIBC_INLINE ByteSpan region() { return {reinterpret_cast(this), outer_size()}; } @@ -229,42 +212,53 @@ class Block { /// @returns The block immediately after this one, or a null pointer if this /// is the last block. - Block *next() const; + LIBC_INLINE Block *next() const { + if (next_ & LAST_MASK) + return nullptr; + return reinterpret_cast(reinterpret_cast(this) + + outer_size()); + } /// @returns The free block immediately before this one, otherwise nullptr. - Block *prev_free() const; + LIBC_INLINE Block *prev_free() const { + if (!(next_ & PREV_FREE_MASK)) + return nullptr; + return reinterpret_cast(reinterpret_cast(this) - prev_); + } /// @returns Whether the block is unavailable for allocation. - bool used() const { return !next() || !next()->prev_free(); } + LIBC_INLINE bool used() const { return !next() || !next()->prev_free(); } /// Marks this block as in use. - void mark_used() { + LIBC_INLINE void mark_used() { LIBC_ASSERT(next() && "last block is always considered used"); next()->next_ &= ~PREV_FREE_MASK; } /// Marks this block as free. - void mark_free() { + LIBC_INLINE void mark_free() { LIBC_ASSERT(next() && "last block is always considered used"); next()->next_ |= PREV_FREE_MASK; // The next block's prev_ field becomes alive, as it is no longer part of // this block's used space. - *new (&next()->prev_) offset_type = outer_size(); + *new (&next()->prev_) size_t = outer_size(); } /// Marks this block as the last one in the chain. Makes next() return /// nullptr. - void mark_last() { next_ |= LAST_MASK; } + LIBC_INLINE void mark_last() { next_ |= LAST_MASK; } - constexpr Block(size_t outer_size); + LIBC_INLINE constexpr Block(size_t outer_size) : next_(outer_size) { + LIBC_ASSERT(outer_size % ALIGNMENT == 0 && "block sizes must be aligned"); + } - bool is_usable_space_aligned(size_t alignment) const { + LIBC_INLINE bool is_usable_space_aligned(size_t alignment) const { return reinterpret_cast(usable_space()) % alignment == 0; } /// @returns The new inner size of this block that would give the usable /// space of the next block the given alignment. - size_t padding_for_alignment(size_t alignment) const { + LIBC_INLINE size_t padding_for_alignment(size_t alignment) const { if (is_usable_space_aligned(alignment)) return 0; @@ -322,7 +316,9 @@ class Block { private: /// Construct a block to represent a span of bytes. Overwrites only enough /// memory for the block header; the rest of the span is left alone. - static Block *as_block(ByteSpan bytes); + LIBC_INLINE static Block *as_block(ByteSpan bytes) { + return ::new (bytes.data()) Block(bytes.size()); + } /// Like `split`, but assumes the caller has already checked to parameters to /// ensure the split will succeed. @@ -332,11 +328,11 @@ class Block { /// block. This field is only alive when the previous block is free; /// otherwise, its memory is reused as part of the previous block's usable /// space. - offset_type prev_ = 0; + size_t prev_ = 0; /// Offset from this block to the next block. Valid even if this is the last /// block, since it equals the size of the block. - offset_type next_ = 0; + size_t next_ = 0; /// Information about the current state of the block is stored in the two low /// order bits of the next_ value. These are guaranteed free by a minimum @@ -347,9 +343,10 @@ class Block { /// previous block is free. /// * If the `last` flag is set, the block is the sentinel last block. It is /// summarily considered used and has no next block. -} __attribute__((packed, aligned(cpp::max(kAlign, size_t{4})))); +} __attribute__((packed, aligned(cpp::max(alignof(max_align_t), size_t{4})))); -// Public template method implementations. +inline constexpr size_t Block::BLOCK_OVERHEAD = + align_up(sizeof(Block), ALIGNMENT); LIBC_INLINE ByteSpan get_aligned_subspan(ByteSpan bytes, size_t alignment) { if (bytes.data() == nullptr) @@ -367,9 +364,8 @@ LIBC_INLINE ByteSpan get_aligned_subspan(ByteSpan bytes, size_t alignment) { aligned_end - aligned_start); } -template -optional *> -Block::init(ByteSpan region) { +LIBC_INLINE +optional Block::init(ByteSpan region) { optional result = get_aligned_subspan(region, ALIGNMENT); if (!result) return {}; @@ -379,7 +375,7 @@ Block::init(ByteSpan region) { if (region.size() < 2 * BLOCK_OVERHEAD) return {}; - if (cpp::numeric_limits::max() < region.size()) + if (cpp::numeric_limits::max() < region.size()) return {}; Block *block = as_block(region.first(region.size() - BLOCK_OVERHEAD)); @@ -389,9 +385,8 @@ Block::init(ByteSpan region) { return block; } -template -bool Block::can_allocate(size_t alignment, - size_t size) const { +LIBC_INLINE +bool Block::can_allocate(size_t alignment, size_t size) const { if (inner_size() < size) return false; if (is_usable_space_aligned(alignment)) @@ -406,10 +401,8 @@ bool Block::can_allocate(size_t alignment, return size <= aligned_inner_size; } -template -typename Block::BlockInfo -Block::allocate(Block *block, size_t alignment, - size_t size) { +LIBC_INLINE +Block::BlockInfo Block::allocate(Block *block, size_t alignment, size_t size) { LIBC_ASSERT( block->can_allocate(alignment, size) && "Calls to this function for a given alignment and size should only be " @@ -447,9 +440,8 @@ Block::allocate(Block *block, size_t alignment, return info; } -template -optional *> -Block::split(size_t new_inner_size) { +LIBC_INLINE +optional Block::split(size_t new_inner_size) { if (used()) return {}; // The prev_ field of the next block is always available, so there is a @@ -469,9 +461,8 @@ Block::split(size_t new_inner_size) { return split_impl(new_inner_size); } -template -Block * -Block::split_impl(size_t new_inner_size) { +LIBC_INLINE +Block *Block::split_impl(size_t new_inner_size) { size_t outer_size1 = outer_size(new_inner_size); LIBC_ASSERT(outer_size1 % ALIGNMENT == 0 && "new size must be aligned"); ByteSpan new_region = region().subspan(outer_size1); @@ -484,8 +475,8 @@ Block::split_impl(size_t new_inner_size) { return new_block; } -template -bool Block::merge_next() { +LIBC_INLINE +bool Block::merge_next() { if (used() || next()->used()) return false; size_t new_size = outer_size() + next()->outer_size(); @@ -495,34 +486,6 @@ bool Block::merge_next() { return true; } -template -Block *Block::next() const { - if (next_ & LAST_MASK) - return nullptr; - return reinterpret_cast(reinterpret_cast(this) + - outer_size()); -} - -template -Block *Block::prev_free() const { - if (!(next_ & PREV_FREE_MASK)) - return nullptr; - return reinterpret_cast(reinterpret_cast(this) - prev_); -} - -// Private template method implementations. - -template -constexpr Block::Block(size_t outer_size) - : next_(outer_size) { - LIBC_ASSERT(outer_size % ALIGNMENT == 0 && "block sizes must be aligned"); -} - -template -Block *Block::as_block(ByteSpan bytes) { - return ::new (bytes.data()) Block(bytes.size()); -} - } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC___SUPPORT_BLOCK_H diff --git a/libc/src/__support/freelist.cpp b/libc/src/__support/freelist.cpp index d3dd44895130cd..bfb90ae1c4db47 100644 --- a/libc/src/__support/freelist.cpp +++ b/libc/src/__support/freelist.cpp @@ -12,7 +12,7 @@ namespace LIBC_NAMESPACE_DECL { void FreeList::push(Node *node) { if (begin_) { - LIBC_ASSERT(Block<>::from_usable_space(node)->outer_size() == + LIBC_ASSERT(Block::from_usable_space(node)->outer_size() == begin_->block()->outer_size() && "freelist entries must have the same size"); // Since the list is circular, insert the node immediately before begin_. diff --git a/libc/src/__support/freelist.h b/libc/src/__support/freelist.h index eaeaeb013eeaec..c51f14fe57ae73 100644 --- a/libc/src/__support/freelist.h +++ b/libc/src/__support/freelist.h @@ -26,12 +26,12 @@ class FreeList { class Node { public: /// @returns The block containing this node. - LIBC_INLINE const Block<> *block() const { - return Block<>::from_usable_space(this); + LIBC_INLINE const Block *block() const { + return Block::from_usable_space(this); } /// @returns The block containing this node. - LIBC_INLINE Block<> *block() { return Block<>::from_usable_space(this); } + LIBC_INLINE Block *block() { return Block::from_usable_space(this); } /// @returns The inner size of blocks in the list containing this node. LIBC_INLINE size_t size() const { return block()->inner_size(); } @@ -58,11 +58,11 @@ class FreeList { LIBC_INLINE Node *begin() { return begin_; } /// @returns The first block in the list. - LIBC_INLINE Block<> *front() { return begin_->block(); } + LIBC_INLINE Block *front() { return begin_->block(); } /// Push a block to the back of the list. /// The block must be large enough to contain a node. - LIBC_INLINE void push(Block<> *block) { + LIBC_INLINE void push(Block *block) { LIBC_ASSERT(!block->used() && "only free blocks can be placed on free lists"); LIBC_ASSERT(block->inner_size_free() >= sizeof(FreeList) && diff --git a/libc/src/__support/freelist_heap.h b/libc/src/__support/freelist_heap.h index cfcf72fc4c9859..8fa36257cb91ae 100644 --- a/libc/src/__support/freelist_heap.h +++ b/libc/src/__support/freelist_heap.h @@ -53,7 +53,7 @@ class FreeListHeap { void *allocate_impl(size_t alignment, size_t size); - span block_to_span(Block<> *block) { + span block_to_span(Block *block) { return span(block->usable_space(), block->inner_size()); } @@ -75,8 +75,8 @@ template class FreeListHeapBuffer : public FreeListHeap { LIBC_INLINE void FreeListHeap::init() { LIBC_ASSERT(!is_initialized && "duplicate initialization"); - auto result = Block<>::init(region()); - Block<> *block = *result; + auto result = Block::init(region()); + Block *block = *result; free_store.set_range({0, cpp::bit_ceil(block->inner_size())}); free_store.insert(block); is_initialized = true; @@ -93,17 +93,17 @@ LIBC_INLINE void *FreeListHeap::allocate_impl(size_t alignment, size_t size) { // TODO: usable_space should always be aligned to max_align_t. if (alignment > alignof(max_align_t) || - (Block<>::BLOCK_OVERHEAD % alignof(max_align_t) != 0)) { + (Block::BLOCK_OVERHEAD % alignof(max_align_t) != 0)) { // TODO: This bound isn't precisely calculated yet. It assumes one extra - // Block<>::ALIGNMENT to accomodate the possibility for padding block + // Block::ALIGNMENT to accomodate the possibility for padding block // overhead. (alignment - 1) ensures that there is an aligned point // somewhere in usable_space, but this isn't tight either, since // usable_space is also already somewhat aligned. - if (add_overflow(size, (alignment - 1) + Block<>::ALIGNMENT, request_size)) + if (add_overflow(size, (alignment - 1) + Block::ALIGNMENT, request_size)) return nullptr; } - Block<> *block = free_store.remove_best_fit(request_size); + Block *block = free_store.remove_best_fit(request_size); if (!block) return nullptr; @@ -111,7 +111,7 @@ LIBC_INLINE void *FreeListHeap::allocate_impl(size_t alignment, size_t size) { "block should always be large enough to allocate at the correct " "alignment"); - auto block_info = Block<>::allocate(block, alignment, size); + auto block_info = Block::allocate(block, alignment, size); if (block_info.next) free_store.insert(block_info.next); if (block_info.prev) @@ -143,14 +143,14 @@ LIBC_INLINE void FreeListHeap::free(void *ptr) { LIBC_ASSERT(is_valid_ptr(bytes) && "Invalid pointer"); - Block<> *block = Block<>::from_usable_space(bytes); + Block *block = Block::from_usable_space(bytes); LIBC_ASSERT(block->next() && "sentinel last block cannot be freed"); LIBC_ASSERT(block->used() && "double free"); block->mark_free(); // Can we combine with the left or right blocks? - Block<> *prev_free = block->prev_free(); - Block<> *next = block->next(); + Block *prev_free = block->prev_free(); + Block *next = block->next(); if (prev_free != nullptr) { // Remove from free store and merge. @@ -183,7 +183,7 @@ LIBC_INLINE void *FreeListHeap::realloc(void *ptr, size_t size) { if (!is_valid_ptr(bytes)) return nullptr; - Block<> *block = Block<>::from_usable_space(bytes); + Block *block = Block::from_usable_space(bytes); if (!block->used()) return nullptr; size_t old_size = block->inner_size(); diff --git a/libc/src/__support/freestore.h b/libc/src/__support/freestore.h index f04b561f5d91dc..97197dda4b546b 100644 --- a/libc/src/__support/freestore.h +++ b/libc/src/__support/freestore.h @@ -29,40 +29,40 @@ class FreeStore { /// Insert a free block. If the block is too small to be tracked, nothing /// happens. - void insert(Block<> *block); + void insert(Block *block); /// Remove a free block. If the block is too small to be tracked, nothing /// happens. - void remove(Block<> *block); + void remove(Block *block); /// Remove a best-fit free block that can contain the given size when /// allocated. Returns nullptr if there is no such block. - Block<> *remove_best_fit(size_t size); + Block *remove_best_fit(size_t size); private: static constexpr size_t ALIGNMENT = alignof(max_align_t); static constexpr size_t MIN_OUTER_SIZE = - align_up(Block<>::BLOCK_OVERHEAD + sizeof(FreeList::Node), ALIGNMENT); + align_up(Block::BLOCK_OVERHEAD + sizeof(FreeList::Node), ALIGNMENT); static constexpr size_t MIN_LARGE_OUTER_SIZE = - align_up(Block<>::BLOCK_OVERHEAD + sizeof(FreeTrie::Node), ALIGNMENT); + align_up(Block::BLOCK_OVERHEAD + sizeof(FreeTrie::Node), ALIGNMENT); static constexpr size_t NUM_SMALL_SIZES = (MIN_LARGE_OUTER_SIZE - MIN_OUTER_SIZE) / ALIGNMENT; - LIBC_INLINE static bool too_small(Block<> *block) { + LIBC_INLINE static bool too_small(Block *block) { return block->outer_size() < MIN_OUTER_SIZE; } - LIBC_INLINE static bool is_small(Block<> *block) { + LIBC_INLINE static bool is_small(Block *block) { return block->outer_size() < MIN_LARGE_OUTER_SIZE; } - FreeList &small_list(Block<> *block); + FreeList &small_list(Block *block); FreeList *find_best_small_fit(size_t size); cpp::array small_lists; FreeTrie large_trie; }; -LIBC_INLINE void FreeStore::insert(Block<> *block) { +LIBC_INLINE void FreeStore::insert(Block *block) { if (too_small(block)) return; if (is_small(block)) @@ -71,7 +71,7 @@ LIBC_INLINE void FreeStore::insert(Block<> *block) { large_trie.push(block); } -LIBC_INLINE void FreeStore::remove(Block<> *block) { +LIBC_INLINE void FreeStore::remove(Block *block) { if (too_small(block)) return; if (is_small(block)) { @@ -83,21 +83,21 @@ LIBC_INLINE void FreeStore::remove(Block<> *block) { } } -LIBC_INLINE Block<> *FreeStore::remove_best_fit(size_t size) { +LIBC_INLINE Block *FreeStore::remove_best_fit(size_t size) { if (FreeList *list = find_best_small_fit(size)) { - Block<> *block = list->front(); + Block *block = list->front(); list->pop(); return block; } if (FreeTrie::Node *best_fit = large_trie.find_best_fit(size)) { - Block<> *block = best_fit->block(); + Block *block = best_fit->block(); large_trie.remove(best_fit); return block; } return nullptr; } -LIBC_INLINE FreeList &FreeStore::small_list(Block<> *block) { +LIBC_INLINE FreeList &FreeStore::small_list(Block *block) { LIBC_ASSERT(is_small(block) && "only legal for small blocks"); return small_lists[(block->outer_size() - MIN_OUTER_SIZE) / ALIGNMENT]; } diff --git a/libc/src/__support/freetrie.h b/libc/src/__support/freetrie.h index ff1912ee94f884..42363c2c9e2f4e 100644 --- a/libc/src/__support/freetrie.h +++ b/libc/src/__support/freetrie.h @@ -96,7 +96,7 @@ class FreeTrie { LIBC_INLINE bool empty() const { return !root; } /// Push a block to the trie. - void push(Block<> *block); + void push(Block *block); /// Remove a node from this trie node's free list. void remove(Node *node); @@ -117,7 +117,7 @@ class FreeTrie { SizeRange range; }; -LIBC_INLINE void FreeTrie::push(Block<> *block) { +LIBC_INLINE void FreeTrie::push(Block *block) { LIBC_ASSERT(block->inner_size_free() >= sizeof(Node) && "block too small to accomodate free trie node"); size_t size = block->inner_size(); diff --git a/libc/src/sys/socket/linux/recvfrom.cpp b/libc/src/sys/socket/linux/recvfrom.cpp index a0f8278cd5deb4..574e65f64a54b0 100644 --- a/libc/src/sys/socket/linux/recvfrom.cpp +++ b/libc/src/sys/socket/linux/recvfrom.cpp @@ -23,17 +23,26 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(ssize_t, recvfrom, (int sockfd, void *buf, size_t len, int flags, - struct sockaddr *__restrict dest_addr, + sockaddr *__restrict src_addr, socklen_t *__restrict addrlen)) { + // addrlen is a value-result argument. If it's not null, it passes the max + // size of the buffer src_addr to the syscall. After the syscall, it's updated + // to the actual size of the source address. This may be larger than the + // buffer, in which case the buffer contains a truncated result. + size_t srcaddr_sz; + if (src_addr) + srcaddr_sz = *addrlen; + (void)srcaddr_sz; // prevent "set but not used" warning + #ifdef SYS_recvfrom ssize_t ret = LIBC_NAMESPACE::syscall_impl( - SYS_recvfrom, sockfd, buf, len, flags, dest_addr, addrlen); + SYS_recvfrom, sockfd, buf, len, flags, src_addr, addrlen); #elif defined(SYS_socketcall) unsigned long sockcall_args[6] = {static_cast(sockfd), reinterpret_cast(buf), static_cast(len), static_cast(flags), - reinterpret_cast(dest_addr), + reinterpret_cast(src_addr), static_cast(addrlen)}; ssize_t ret = LIBC_NAMESPACE::syscall_impl( SYS_socketcall, SYS_RECVFROM, sockcall_args); @@ -46,8 +55,13 @@ LLVM_LIBC_FUNCTION(ssize_t, recvfrom, } MSAN_UNPOISON(buf, ret); - MSAN_UNPOISON(addrlen, sizeof(socklen_t)); + if (src_addr) { + size_t min_src_addr_size = (*addrlen < srcaddr_sz) ? *addrlen : srcaddr_sz; + (void)min_src_addr_size; // prevent "set but not used" warning + + MSAN_UNPOISON(src_addr, min_src_addr_size); + } return ret; } diff --git a/libc/src/sys/socket/recvfrom.h b/libc/src/sys/socket/recvfrom.h index 14869802e72563..5c12410dd4ea43 100644 --- a/libc/src/sys/socket/recvfrom.h +++ b/libc/src/sys/socket/recvfrom.h @@ -18,8 +18,7 @@ namespace LIBC_NAMESPACE_DECL { ssize_t recvfrom(int sockfd, void *buf, size_t len, int flags, - struct sockaddr *__restrict address, - socklen_t *__restrict addrlen); + sockaddr *__restrict src_addr, socklen_t *__restrict addrlen); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/integration/startup/gpu/rpc_interface_test.cpp b/libc/test/integration/startup/gpu/rpc_interface_test.cpp index 2dafa911783ffc..b05ffb92699bf7 100644 --- a/libc/test/integration/startup/gpu/rpc_interface_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_interface_test.cpp @@ -17,27 +17,43 @@ using namespace LIBC_NAMESPACE; // as long as they are mirrored. static void test_interface(bool end_with_send) { uint64_t cnt = 0; - rpc::Client::Port port = rpc::client.open(); - port.send( - [&](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = end_with_send; }); - port.send( - [&](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = cnt = cnt + 1; }); - port.recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); - port.send( - [&](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = cnt = cnt + 1; }); - port.recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); - port.send( - [&](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = cnt = cnt + 1; }); - port.send( - [&](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = cnt = cnt + 1; }); - port.recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); - port.recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + LIBC_NAMESPACE::rpc::Client::Port port = + LIBC_NAMESPACE::rpc::client.open(); + port.send([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = end_with_send; + }); + port.send([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port.recv([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + cnt = buffer->data[0]; + }); + port.send([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port.recv([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + cnt = buffer->data[0]; + }); + port.send([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port.send([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port.recv([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + cnt = buffer->data[0]; + }); + port.recv([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + cnt = buffer->data[0]; + }); if (end_with_send) - port.send([&](rpc::Buffer *buffer, uint32_t) { + port.send([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { buffer->data[0] = cnt = cnt + 1; }); else - port.recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port.recv([&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + cnt = buffer->data[0]; + }); port.close(); ASSERT_TRUE(cnt == 9 && "Invalid number of increments"); diff --git a/libc/test/integration/startup/gpu/rpc_stream_test.cpp b/libc/test/integration/startup/gpu/rpc_stream_test.cpp index 09a4ae67256e3a..208130bcfd9a96 100644 --- a/libc/test/integration/startup/gpu/rpc_stream_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_stream_test.cpp @@ -34,7 +34,8 @@ static void test_stream() { inline_memcpy(send_ptr, str, send_size); ASSERT_TRUE(inline_memcmp(send_ptr, str, send_size) == 0 && "Data mismatch"); - rpc::Client::Port port = rpc::client.open(); + LIBC_NAMESPACE::rpc::Client::Port port = + LIBC_NAMESPACE::rpc::client.open(); port.send_n(send_ptr, send_size); port.recv_n(&recv_ptr, &recv_size, [](uint64_t size) { return malloc(size); }); @@ -77,7 +78,8 @@ static void test_divergent() { inline_memcpy(buffer, &data[offset], offset); ASSERT_TRUE(inline_memcmp(buffer, &data[offset], offset) == 0 && "Data mismatch"); - rpc::Client::Port port = rpc::client.open(); + LIBC_NAMESPACE::rpc::Client::Port port = + LIBC_NAMESPACE::rpc::client.open(); port.send_n(buffer, offset); inline_memset(buffer, offset, 0); port.recv_n(&recv_ptr, &recv_size, [&](uint64_t) { return buffer; }); diff --git a/libc/test/integration/startup/gpu/rpc_test.cpp b/libc/test/integration/startup/gpu/rpc_test.cpp index bec8171180a055..3deb72b9f85dab 100644 --- a/libc/test/integration/startup/gpu/rpc_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_test.cpp @@ -18,12 +18,13 @@ static void test_add_simple() { 10 + 10 * gpu::get_thread_id() + 10 * gpu::get_block_id(); uint64_t cnt = 0; for (uint32_t i = 0; i < num_additions; ++i) { - rpc::Client::Port port = rpc::client.open(); + LIBC_NAMESPACE::rpc::Client::Port port = + LIBC_NAMESPACE::rpc::client.open(); port.send_and_recv( - [=](rpc::Buffer *buffer, uint32_t) { + [=](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { reinterpret_cast(buffer->data)[0] = cnt; }, - [&](rpc::Buffer *buffer, uint32_t) { + [&](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { cnt = reinterpret_cast(buffer->data)[0]; }); port.close(); @@ -33,8 +34,11 @@ static void test_add_simple() { // Test to ensure that the RPC mechanism doesn't hang on divergence. static void test_noop(uint8_t data) { - rpc::Client::Port port = rpc::client.open(); - port.send([=](rpc::Buffer *buffer, uint32_t) { buffer->data[0] = data; }); + LIBC_NAMESPACE::rpc::Client::Port port = + LIBC_NAMESPACE::rpc::client.open(); + port.send([=](LIBC_NAMESPACE::rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = data; + }); port.close(); } diff --git a/libc/test/src/__support/block_test.cpp b/libc/test/src/__support/block_test.cpp index 4d23861155502a..5e437db51b6092 100644 --- a/libc/test/src/__support/block_test.cpp +++ b/libc/test/src/__support/block_test.cpp @@ -14,96 +14,60 @@ #include "src/string/memcpy.h" #include "test/UnitTest/Test.h" -// Block types. -using LargeOffsetBlock = LIBC_NAMESPACE::Block; -using SmallOffsetBlock = LIBC_NAMESPACE::Block; - -// For each of the block types above, we'd like to run the same tests since -// they should work independently of the parameter sizes. Rather than re-writing -// the same test for each case, let's instead create a custom test framework for -// each test case that invokes the actual testing function for each block type. -// -// It's organized this way because the ASSERT/EXPECT macros only work within a -// `Test` class due to those macros expanding to `test` methods. -#define TEST_FOR_EACH_BLOCK_TYPE(TestCase) \ - class LlvmLibcBlockTest##TestCase : public LIBC_NAMESPACE::testing::Test { \ - public: \ - template void RunTest(); \ - }; \ - TEST_F(LlvmLibcBlockTest##TestCase, TestCase) { \ - RunTest(); \ - RunTest(); \ - } \ - template void LlvmLibcBlockTest##TestCase::RunTest() - +using LIBC_NAMESPACE::Block; using LIBC_NAMESPACE::cpp::array; using LIBC_NAMESPACE::cpp::bit_ceil; using LIBC_NAMESPACE::cpp::byte; using LIBC_NAMESPACE::cpp::span; -TEST_FOR_EACH_BLOCK_TYPE(CanCreateSingleAlignedBlock) { +TEST(LlvmLibcBlockTest, CanCreateSingleAlignedBlock) { constexpr size_t kN = 1024; - alignas(BlockType::ALIGNMENT) array bytes; + alignas(Block::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; - BlockType *last = block->next(); - ASSERT_NE(last, static_cast(nullptr)); - constexpr size_t last_outer_size = BlockType::BLOCK_OVERHEAD; + Block *last = block->next(); + ASSERT_NE(last, static_cast(nullptr)); + constexpr size_t last_outer_size = Block::BLOCK_OVERHEAD; EXPECT_EQ(last->outer_size(), last_outer_size); EXPECT_EQ(last->prev_free(), block); EXPECT_TRUE(last->used()); EXPECT_EQ(block->outer_size(), kN - last_outer_size); - constexpr size_t last_prev_field_size = - sizeof(typename BlockType::offset_type); - EXPECT_EQ(block->inner_size(), kN - last_outer_size - - BlockType::BLOCK_OVERHEAD + + constexpr size_t last_prev_field_size = sizeof(size_t); + EXPECT_EQ(block->inner_size(), kN - last_outer_size - Block::BLOCK_OVERHEAD + last_prev_field_size); - EXPECT_EQ(block->prev_free(), static_cast(nullptr)); + EXPECT_EQ(block->prev_free(), static_cast(nullptr)); EXPECT_FALSE(block->used()); } -TEST_FOR_EACH_BLOCK_TYPE(CanCreateUnalignedSingleBlock) { +TEST(LlvmLibcBlockTest, CanCreateUnalignedSingleBlock) { constexpr size_t kN = 1024; // Force alignment, so we can un-force it below - alignas(BlockType::ALIGNMENT) array bytes; + alignas(Block::ALIGNMENT) array bytes; span aligned(bytes); - auto result = BlockType::init(aligned.subspan(1)); + auto result = Block::init(aligned.subspan(1)); EXPECT_TRUE(result.has_value()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotCreateTooSmallBlock) { +TEST(LlvmLibcBlockTest, CannotCreateTooSmallBlock) { array bytes; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); EXPECT_FALSE(result.has_value()); } -// This test specifically checks that we cannot allocate a block with a size -// larger than what can be held by the offset type, we don't need to test with -// multiple block types for this particular check, so we use the normal TEST -// macro and not the custom framework. -TEST(LlvmLibcBlockTest, CannotCreateTooLargeBlock) { - using BlockType = LIBC_NAMESPACE::Block; +TEST(LlvmLibcBlockTest, CanSplitBlock) { constexpr size_t kN = 1024; - - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); - EXPECT_FALSE(result.has_value()); -} - -TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlock) { - constexpr size_t kN = 1024; - constexpr size_t prev_field_size = sizeof(typename BlockType::offset_type); + constexpr size_t prev_field_size = sizeof(size_t); // Give the split position a large alignment. constexpr size_t kSplitN = 512 + prev_field_size; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); auto *block1 = *result; size_t orig_size = block1->outer_size(); @@ -114,7 +78,7 @@ TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlock) { EXPECT_EQ(block1->inner_size(), kSplitN); EXPECT_EQ(block1->outer_size(), - kSplitN - prev_field_size + BlockType::BLOCK_OVERHEAD); + kSplitN - prev_field_size + Block::BLOCK_OVERHEAD); EXPECT_EQ(block2->outer_size(), orig_size - block1->outer_size()); EXPECT_FALSE(block2->used()); @@ -123,26 +87,26 @@ TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlock) { EXPECT_EQ(block2->prev_free(), block1); } -TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlockUnaligned) { +TEST(LlvmLibcBlockTest, CanSplitBlockUnaligned) { constexpr size_t kN = 1024; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block1 = *result; + Block *block1 = *result; size_t orig_size = block1->outer_size(); constexpr size_t kSplitN = 513; - constexpr size_t prev_field_size = sizeof(typename BlockType::offset_type); + constexpr size_t prev_field_size = sizeof(size_t); uintptr_t split_addr = reinterpret_cast(block1) + (kSplitN - prev_field_size); // Round split_addr up to a multiple of the alignment. - split_addr += alignof(BlockType) - (split_addr % alignof(BlockType)); + split_addr += alignof(Block) - (split_addr % alignof(Block)); uintptr_t split_len = split_addr - (uintptr_t)&bytes + prev_field_size; result = block1->split(kSplitN); ASSERT_TRUE(result.has_value()); - BlockType *block2 = *result; + Block *block2 = *result; EXPECT_EQ(block1->inner_size(), split_len); @@ -153,7 +117,7 @@ TEST_FOR_EACH_BLOCK_TYPE(CanSplitBlockUnaligned) { EXPECT_EQ(block2->prev_free(), block1); } -TEST_FOR_EACH_BLOCK_TYPE(CanSplitMidBlock) { +TEST(LlvmLibcBlockTest, CanSplitMidBlock) { // split once, then split the original block again to ensure that the // pointers get rewired properly. // I.e. @@ -167,18 +131,18 @@ TEST_FOR_EACH_BLOCK_TYPE(CanSplitMidBlock) { constexpr size_t kSplit1 = 512; constexpr size_t kSplit2 = 256; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block1 = *result; + Block *block1 = *result; result = block1->split(kSplit1); ASSERT_TRUE(result.has_value()); - BlockType *block2 = *result; + Block *block2 = *result; result = block1->split(kSplit2); ASSERT_TRUE(result.has_value()); - BlockType *block3 = *result; + Block *block3 = *result; EXPECT_EQ(block1->next(), block3); EXPECT_EQ(block3->prev_free(), block1); @@ -186,97 +150,97 @@ TEST_FOR_EACH_BLOCK_TYPE(CanSplitMidBlock) { EXPECT_EQ(block2->prev_free(), block3); } -TEST_FOR_EACH_BLOCK_TYPE(CannotSplitTooSmallBlock) { +TEST(LlvmLibcBlockTest, CannotSplitTooSmallBlock) { constexpr size_t kN = 64; constexpr size_t kSplitN = kN + 1; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; result = block->split(kSplitN); ASSERT_FALSE(result.has_value()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotSplitBlockWithoutHeaderSpace) { +TEST(LlvmLibcBlockTest, CannotSplitBlockWithoutHeaderSpace) { constexpr size_t kN = 1024; - constexpr size_t kSplitN = kN - 2 * BlockType::BLOCK_OVERHEAD - 1; + constexpr size_t kSplitN = kN - 2 * Block::BLOCK_OVERHEAD - 1; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; result = block->split(kSplitN); ASSERT_FALSE(result.has_value()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotMakeBlockLargerInSplit) { +TEST(LlvmLibcBlockTest, CannotMakeBlockLargerInSplit) { // Ensure that we can't ask for more space than the block actually has... constexpr size_t kN = 1024; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; result = block->split(block->inner_size() + 1); ASSERT_FALSE(result.has_value()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotMakeSecondBlockLargerInSplit) { +TEST(LlvmLibcBlockTest, CannotMakeSecondBlockLargerInSplit) { // Ensure that the second block in split is at least of the size of header. constexpr size_t kN = 1024; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; - result = block->split(block->inner_size() - BlockType::BLOCK_OVERHEAD + 1); + result = block->split(block->inner_size() - Block::BLOCK_OVERHEAD + 1); ASSERT_FALSE(result.has_value()); } -TEST_FOR_EACH_BLOCK_TYPE(CanMakeMinimalSizeFirstBlock) { +TEST(LlvmLibcBlockTest, CanMakeMinimalSizeFirstBlock) { // This block does support splitting with minimal payload size. constexpr size_t kN = 1024; - constexpr size_t minimal_size = sizeof(typename BlockType::offset_type); + constexpr size_t minimal_size = sizeof(size_t); - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; result = block->split(minimal_size); ASSERT_TRUE(result.has_value()); EXPECT_EQ(block->inner_size(), minimal_size); } -TEST_FOR_EACH_BLOCK_TYPE(CanMakeMinimalSizeSecondBlock) { +TEST(LlvmLibcBlockTest, CanMakeMinimalSizeSecondBlock) { // Likewise, the split block can be minimal-width. constexpr size_t kN = 1024; - constexpr size_t minimal_size = sizeof(typename BlockType::offset_type); + constexpr size_t minimal_size = sizeof(size_t); - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block1 = *result; + Block *block1 = *result; - result = block1->split(block1->inner_size() - BlockType::BLOCK_OVERHEAD); + result = block1->split(block1->inner_size() - Block::BLOCK_OVERHEAD); ASSERT_TRUE(result.has_value()); - BlockType *block2 = *result; + Block *block2 = *result; EXPECT_EQ(block2->inner_size(), minimal_size); } -TEST_FOR_EACH_BLOCK_TYPE(CanMarkBlockUsed) { +TEST(LlvmLibcBlockTest, CanMarkBlockUsed) { constexpr size_t kN = 1024; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; size_t orig_size = block->outer_size(); block->mark_used(); @@ -287,33 +251,33 @@ TEST_FOR_EACH_BLOCK_TYPE(CanMarkBlockUsed) { EXPECT_FALSE(block->used()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotSplitUsedBlock) { +TEST(LlvmLibcBlockTest, CannotSplitUsedBlock) { constexpr size_t kN = 1024; constexpr size_t kSplitN = 512; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; block->mark_used(); result = block->split(kSplitN); ASSERT_FALSE(result.has_value()); } -TEST_FOR_EACH_BLOCK_TYPE(CanMergeWithNextBlock) { +TEST(LlvmLibcBlockTest, CanMergeWithNextBlock) { // Do the three way merge from "CanSplitMidBlock", and let's // merge block 3 and 2 constexpr size_t kN = 1024; // Give the split positions large alignments. - constexpr size_t prev_field_size = sizeof(typename BlockType::offset_type); + constexpr size_t prev_field_size = sizeof(size_t); constexpr size_t kSplit1 = 512 + prev_field_size; constexpr size_t kSplit2 = 256 + prev_field_size; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block1 = *result; + Block *block1 = *result; size_t orig_size = block1->outer_size(); result = block1->split(kSplit1); @@ -321,7 +285,7 @@ TEST_FOR_EACH_BLOCK_TYPE(CanMergeWithNextBlock) { result = block1->split(kSplit2); ASSERT_TRUE(result.has_value()); - BlockType *block3 = *result; + Block *block3 = *result; EXPECT_TRUE(block3->merge_next()); @@ -331,31 +295,31 @@ TEST_FOR_EACH_BLOCK_TYPE(CanMergeWithNextBlock) { EXPECT_EQ(block3->outer_size(), orig_size - block1->outer_size()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotMergeWithFirstOrLastBlock) { +TEST(LlvmLibcBlockTest, CannotMergeWithFirstOrLastBlock) { constexpr size_t kN = 1024; constexpr size_t kSplitN = 512; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block1 = *result; + Block *block1 = *result; // Do a split, just to check that the checks on next/prev are different... result = block1->split(kSplitN); ASSERT_TRUE(result.has_value()); - BlockType *block2 = *result; + Block *block2 = *result; EXPECT_FALSE(block2->merge_next()); } -TEST_FOR_EACH_BLOCK_TYPE(CannotMergeUsedBlock) { +TEST(LlvmLibcBlockTest, CannotMergeUsedBlock) { constexpr size_t kN = 1024; constexpr size_t kSplitN = 512; - alignas(BlockType::ALIGNMENT) array bytes; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; // Do a split, just to check that the checks on next/prev are different... result = block->split(kSplitN); @@ -365,177 +329,176 @@ TEST_FOR_EACH_BLOCK_TYPE(CannotMergeUsedBlock) { EXPECT_FALSE(block->merge_next()); } -TEST_FOR_EACH_BLOCK_TYPE(CanGetBlockFromUsableSpace) { +TEST(LlvmLibcBlockTest, CanGetBlockFromUsableSpace) { constexpr size_t kN = 1024; array bytes{}; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block1 = *result; + Block *block1 = *result; void *ptr = block1->usable_space(); - BlockType *block2 = BlockType::from_usable_space(ptr); + Block *block2 = Block::from_usable_space(ptr); EXPECT_EQ(block1, block2); } -TEST_FOR_EACH_BLOCK_TYPE(CanGetConstBlockFromUsableSpace) { +TEST(LlvmLibcBlockTest, CanGetConstBlockFromUsableSpace) { constexpr size_t kN = 1024; array bytes{}; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - const BlockType *block1 = *result; + const Block *block1 = *result; const void *ptr = block1->usable_space(); - const BlockType *block2 = BlockType::from_usable_space(ptr); + const Block *block2 = Block::from_usable_space(ptr); EXPECT_EQ(block1, block2); } -TEST_FOR_EACH_BLOCK_TYPE(CanAllocate) { - constexpr size_t kN = 1024 + BlockType::BLOCK_OVERHEAD; +TEST(LlvmLibcBlockTest, CanAllocate) { + constexpr size_t kN = 1024 + Block::BLOCK_OVERHEAD; // Ensure we can allocate everything up to the block size within this block. - for (size_t i = 0; i < kN - 2 * BlockType::BLOCK_OVERHEAD; ++i) { - alignas(BlockType::ALIGNMENT) array bytes{}; - auto result = BlockType::init(bytes); + for (size_t i = 0; i < kN - 2 * Block::BLOCK_OVERHEAD; ++i) { + alignas(Block::ALIGNMENT) array bytes{}; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; constexpr size_t ALIGN = 1; // Effectively ignores alignment. EXPECT_TRUE(block->can_allocate(ALIGN, i)); // For each can_allocate, we should be able to do a successful call to // allocate. - auto info = BlockType::allocate(block, ALIGN, i); - EXPECT_NE(info.block, static_cast(nullptr)); + auto info = Block::allocate(block, ALIGN, i); + EXPECT_NE(info.block, static_cast(nullptr)); } - alignas(BlockType::ALIGNMENT) array bytes{}; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes{}; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; // Given a block of size N (assuming it's also a power of two), we should be // able to allocate a block within it that's aligned to N/2. This is // because regardless of where the buffer is located, we can always find a // starting location within it that meets this alignment. EXPECT_TRUE(block->can_allocate(block->outer_size() / 2, 1)); - auto info = BlockType::allocate(block, block->outer_size() / 2, 1); - EXPECT_NE(info.block, static_cast(nullptr)); + auto info = Block::allocate(block, block->outer_size() / 2, 1); + EXPECT_NE(info.block, static_cast(nullptr)); } -TEST_FOR_EACH_BLOCK_TYPE(AllocateAlreadyAligned) { +TEST(LlvmLibcBlockTest, AllocateAlreadyAligned) { constexpr size_t kN = 1024; - alignas(BlockType::ALIGNMENT) array bytes{}; - auto result = BlockType::init(bytes); + alignas(Block::ALIGNMENT) array bytes{}; + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; // This should result in no new blocks. - constexpr size_t kAlignment = BlockType::ALIGNMENT; - constexpr size_t prev_field_size = sizeof(typename BlockType::offset_type); - constexpr size_t kExpectedSize = BlockType::ALIGNMENT + prev_field_size; + constexpr size_t kAlignment = Block::ALIGNMENT; + constexpr size_t prev_field_size = sizeof(size_t); + constexpr size_t kExpectedSize = Block::ALIGNMENT + prev_field_size; EXPECT_TRUE(block->can_allocate(kAlignment, kExpectedSize)); auto [aligned_block, prev, next] = - BlockType::allocate(block, BlockType::ALIGNMENT, kExpectedSize); + Block::allocate(block, Block::ALIGNMENT, kExpectedSize); // Since this is already aligned, there should be no previous block. - EXPECT_EQ(prev, static_cast(nullptr)); + EXPECT_EQ(prev, static_cast(nullptr)); // Ensure we the block is aligned and the size we expect. - EXPECT_NE(aligned_block, static_cast(nullptr)); - EXPECT_TRUE(aligned_block->is_usable_space_aligned(BlockType::ALIGNMENT)); + EXPECT_NE(aligned_block, static_cast(nullptr)); + EXPECT_TRUE(aligned_block->is_usable_space_aligned(Block::ALIGNMENT)); EXPECT_EQ(aligned_block->inner_size(), kExpectedSize); // Check the next block. - EXPECT_NE(next, static_cast(nullptr)); + EXPECT_NE(next, static_cast(nullptr)); EXPECT_EQ(aligned_block->next(), next); EXPECT_EQ(reinterpret_cast(next) + next->outer_size(), - bytes.data() + bytes.size() - BlockType::BLOCK_OVERHEAD); + bytes.data() + bytes.size() - Block::BLOCK_OVERHEAD); } -TEST_FOR_EACH_BLOCK_TYPE(AllocateNeedsAlignment) { +TEST(LlvmLibcBlockTest, AllocateNeedsAlignment) { constexpr size_t kN = 1024; alignas(kN) array bytes{}; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; // Ensure first the usable_data is only aligned to the block alignment. - ASSERT_EQ(block->usable_space(), bytes.data() + BlockType::BLOCK_OVERHEAD); - ASSERT_EQ(block->prev_free(), static_cast(nullptr)); + ASSERT_EQ(block->usable_space(), bytes.data() + Block::BLOCK_OVERHEAD); + ASSERT_EQ(block->prev_free(), static_cast(nullptr)); // Now pick an alignment such that the usable space is not already aligned to // it. We want to explicitly test that the block will split into one before // it. - constexpr size_t kAlignment = bit_ceil(BlockType::BLOCK_OVERHEAD) * 8; + constexpr size_t kAlignment = bit_ceil(Block::BLOCK_OVERHEAD) * 8; ASSERT_FALSE(block->is_usable_space_aligned(kAlignment)); constexpr size_t kSize = 10; EXPECT_TRUE(block->can_allocate(kAlignment, kSize)); - auto [aligned_block, prev, next] = - BlockType::allocate(block, kAlignment, kSize); + auto [aligned_block, prev, next] = Block::allocate(block, kAlignment, kSize); // Check the previous block was created appropriately. Since this block is the // first block, a new one should be made before this. - EXPECT_NE(prev, static_cast(nullptr)); + EXPECT_NE(prev, static_cast(nullptr)); EXPECT_EQ(aligned_block->prev_free(), prev); EXPECT_EQ(prev->next(), aligned_block); EXPECT_EQ(prev->outer_size(), reinterpret_cast(aligned_block) - reinterpret_cast(prev)); // Ensure we the block is aligned and the size we expect. - EXPECT_NE(next, static_cast(nullptr)); + EXPECT_NE(next, static_cast(nullptr)); EXPECT_TRUE(aligned_block->is_usable_space_aligned(kAlignment)); // Check the next block. - EXPECT_NE(next, static_cast(nullptr)); + EXPECT_NE(next, static_cast(nullptr)); EXPECT_EQ(aligned_block->next(), next); EXPECT_EQ(reinterpret_cast(next) + next->outer_size(), - bytes.data() + bytes.size() - BlockType::BLOCK_OVERHEAD); + bytes.data() + bytes.size() - Block::BLOCK_OVERHEAD); } -TEST_FOR_EACH_BLOCK_TYPE(PreviousBlockMergedIfNotFirst) { +TEST(LlvmLibcBlockTest, PreviousBlockMergedIfNotFirst) { constexpr size_t kN = 1024; alignas(kN) array bytes{}; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; + Block *block = *result; // Split the block roughly halfway and work on the second half. auto result2 = block->split(kN / 2); ASSERT_TRUE(result2.has_value()); - BlockType *newblock = *result2; + Block *newblock = *result2; ASSERT_EQ(newblock->prev_free(), block); size_t old_prev_size = block->outer_size(); // Now pick an alignment such that the usable space is not already aligned to // it. We want to explicitly test that the block will split into one before // it. - constexpr size_t kAlignment = bit_ceil(BlockType::BLOCK_OVERHEAD) * 8; + constexpr size_t kAlignment = bit_ceil(Block::BLOCK_OVERHEAD) * 8; ASSERT_FALSE(newblock->is_usable_space_aligned(kAlignment)); // Ensure we can allocate in the new block. - constexpr size_t kSize = BlockType::ALIGNMENT; + constexpr size_t kSize = Block::ALIGNMENT; EXPECT_TRUE(newblock->can_allocate(kAlignment, kSize)); auto [aligned_block, prev, next] = - BlockType::allocate(newblock, kAlignment, kSize); + Block::allocate(newblock, kAlignment, kSize); // Now there should be no new previous block. Instead, the padding we did // create should be merged into the original previous block. - EXPECT_EQ(prev, static_cast(nullptr)); + EXPECT_EQ(prev, static_cast(nullptr)); EXPECT_EQ(aligned_block->prev_free(), block); EXPECT_EQ(block->next(), aligned_block); EXPECT_GT(block->outer_size(), old_prev_size); } -TEST_FOR_EACH_BLOCK_TYPE(CanRemergeBlockAllocations) { +TEST(LlvmLibcBlockTest, CanRemergeBlockAllocations) { // Finally to ensure we made the split blocks correctly via allocate. We // should be able to reconstruct the original block from the blocklets. // @@ -543,31 +506,30 @@ TEST_FOR_EACH_BLOCK_TYPE(CanRemergeBlockAllocations) { constexpr size_t kN = 1024; alignas(kN) array bytes{}; - auto result = BlockType::init(bytes); + auto result = Block::init(bytes); ASSERT_TRUE(result.has_value()); - BlockType *block = *result; - BlockType *last = block->next(); + Block *block = *result; + Block *last = block->next(); // Ensure first the usable_data is only aligned to the block alignment. - ASSERT_EQ(block->usable_space(), bytes.data() + BlockType::BLOCK_OVERHEAD); - ASSERT_EQ(block->prev_free(), static_cast(nullptr)); + ASSERT_EQ(block->usable_space(), bytes.data() + Block::BLOCK_OVERHEAD); + ASSERT_EQ(block->prev_free(), static_cast(nullptr)); // Now pick an alignment such that the usable space is not already aligned to // it. We want to explicitly test that the block will split into one before // it. - constexpr size_t kAlignment = bit_ceil(BlockType::BLOCK_OVERHEAD) * 8; + constexpr size_t kAlignment = bit_ceil(Block::BLOCK_OVERHEAD) * 8; ASSERT_FALSE(block->is_usable_space_aligned(kAlignment)); - constexpr size_t kSize = BlockType::ALIGNMENT; + constexpr size_t kSize = Block::ALIGNMENT; EXPECT_TRUE(block->can_allocate(kAlignment, kSize)); - auto [aligned_block, prev, next] = - BlockType::allocate(block, kAlignment, kSize); + auto [aligned_block, prev, next] = Block::allocate(block, kAlignment, kSize); // Check we have the appropriate blocks. - ASSERT_NE(prev, static_cast(nullptr)); + ASSERT_NE(prev, static_cast(nullptr)); ASSERT_EQ(aligned_block->prev_free(), prev); - EXPECT_NE(next, static_cast(nullptr)); + EXPECT_NE(next, static_cast(nullptr)); EXPECT_EQ(aligned_block->next(), next); EXPECT_EQ(next->next(), last); @@ -579,7 +541,7 @@ TEST_FOR_EACH_BLOCK_TYPE(CanRemergeBlockAllocations) { // We should have the original buffer. EXPECT_EQ(reinterpret_cast(prev), &*bytes.begin()); - EXPECT_EQ(prev->outer_size(), bytes.size() - BlockType::BLOCK_OVERHEAD); + EXPECT_EQ(prev->outer_size(), bytes.size() - Block::BLOCK_OVERHEAD); EXPECT_EQ(reinterpret_cast(prev) + prev->outer_size(), - &*bytes.end() - BlockType::BLOCK_OVERHEAD); + &*bytes.end() - Block::BLOCK_OVERHEAD); } diff --git a/libc/test/src/__support/freelist_heap_test.cpp b/libc/test/src/__support/freelist_heap_test.cpp index 59ebf4e50974b7..991c158825a888 100644 --- a/libc/test/src/__support/freelist_heap_test.cpp +++ b/libc/test/src/__support/freelist_heap_test.cpp @@ -42,7 +42,7 @@ using LIBC_NAMESPACE::cpp::span; void RunTest(FreeListHeap &allocator, [[maybe_unused]] size_t N); \ }; \ TEST_F(LlvmLibcFreeListHeapTest##TestCase, TestCase) { \ - alignas(Block<>) byte buf[BufferSize] = {byte(0)}; \ + alignas(Block) byte buf[BufferSize] = {byte(0)}; \ FreeListHeap allocator(buf); \ RunTest(allocator, BufferSize); \ RunTest(*freelist_heap, freelist_heap->region().size()); \ @@ -95,13 +95,13 @@ TEST_FOR_EACH_ALLOCATOR(ReturnsNullWhenAllocationTooLarge, 2048) { // is used for other test cases and we don't explicitly free them. TEST(LlvmLibcFreeListHeap, ReturnsNullWhenFull) { constexpr size_t N = 2048; - alignas(Block<>) byte buf[N] = {byte(0)}; + alignas(Block) byte buf[N] = {byte(0)}; FreeListHeap allocator(buf); // Use aligned_allocate so we don't need to worry about ensuring the `buf` // being aligned to max_align_t. - EXPECT_NE(allocator.aligned_allocate(1, N - 2 * Block<>::BLOCK_OVERHEAD), + EXPECT_NE(allocator.aligned_allocate(1, N - 2 * Block::BLOCK_OVERHEAD), static_cast(nullptr)); EXPECT_EQ(allocator.allocate(1), static_cast(nullptr)); } @@ -241,16 +241,16 @@ TEST_FOR_EACH_ALLOCATOR(AlignedAlloc, 2048) { // This test is not part of the TEST_FOR_EACH_ALLOCATOR since we want to // explicitly ensure that the buffer can still return aligned allocations even -// if the underlying buffer is at most aligned to the Block<> alignment. This +// if the underlying buffer is at most aligned to the Block alignment. This // is so we can check that we can still get aligned allocations even if the // underlying buffer is not aligned to the alignments we request. TEST(LlvmLibcFreeListHeap, AlignedAllocOnlyBlockAligned) { constexpr size_t BUFFER_SIZE = 4096; - constexpr size_t BUFFER_ALIGNMENT = alignof(Block<>) * 2; + constexpr size_t BUFFER_ALIGNMENT = alignof(Block) * 2; alignas(BUFFER_ALIGNMENT) byte buf[BUFFER_SIZE] = {byte(0)}; // Ensure the underlying buffer is at most aligned to the block type. - FreeListHeap allocator(span(buf).subspan(alignof(Block<>))); + FreeListHeap allocator(span(buf).subspan(alignof(Block))); constexpr size_t ALIGNMENTS[] = {1, 2, 4, 8, 16, 32, 64, 128, 256}; constexpr size_t SIZE_SCALES[] = {1, 2, 3, 4, 5}; diff --git a/libc/test/src/__support/freelist_malloc_test.cpp b/libc/test/src/__support/freelist_malloc_test.cpp index 583e40d9478223..793e2498304fb9 100644 --- a/libc/test/src/__support/freelist_malloc_test.cpp +++ b/libc/test/src/__support/freelist_malloc_test.cpp @@ -24,12 +24,12 @@ TEST(LlvmLibcFreeListMalloc, Malloc) { constexpr size_t kCallocSize = 64; void *ptr1 = LIBC_NAMESPACE::malloc(kAllocSize); - auto *block = Block<>::from_usable_space(ptr1); + auto *block = Block::from_usable_space(ptr1); EXPECT_GE(block->inner_size(), kAllocSize); LIBC_NAMESPACE::free(ptr1); - ASSERT_NE(block->next(), static_cast *>(nullptr)); - ASSERT_EQ(block->next()->next(), static_cast *>(nullptr)); + ASSERT_NE(block->next(), static_cast(nullptr)); + ASSERT_EQ(block->next()->next(), static_cast(nullptr)); size_t heap_size = block->inner_size(); void *ptr2 = LIBC_NAMESPACE::calloc(kCallocNum, kCallocSize); @@ -46,7 +46,7 @@ TEST(LlvmLibcFreeListMalloc, Malloc) { void *ptr3 = LIBC_NAMESPACE::aligned_alloc(ALIGN, kAllocSize); EXPECT_NE(ptr3, static_cast(nullptr)); EXPECT_EQ(reinterpret_cast(ptr3) % ALIGN, size_t(0)); - auto *aligned_block = reinterpret_cast *>(ptr3); + auto *aligned_block = reinterpret_cast(ptr3); EXPECT_GE(aligned_block->inner_size(), kAllocSize); LIBC_NAMESPACE::free(ptr3); diff --git a/libc/test/src/__support/freelist_test.cpp b/libc/test/src/__support/freelist_test.cpp index 1f310761bee180..bd5ecec45d921b 100644 --- a/libc/test/src/__support/freelist_test.cpp +++ b/libc/test/src/__support/freelist_test.cpp @@ -18,13 +18,13 @@ using LIBC_NAMESPACE::cpp::optional; TEST(LlvmLibcFreeList, FreeList) { byte mem[1024]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *block1 = *maybeBlock; + Block *block1 = *maybeBlock; maybeBlock = block1->split(128); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *block2 = *maybeBlock; + Block *block2 = *maybeBlock; maybeBlock = block2->split(128); ASSERT_TRUE(maybeBlock.has_value()); diff --git a/libc/test/src/__support/freestore_test.cpp b/libc/test/src/__support/freestore_test.cpp index 84f2acfab878a1..7960d32c8bbf0f 100644 --- a/libc/test/src/__support/freestore_test.cpp +++ b/libc/test/src/__support/freestore_test.cpp @@ -21,12 +21,12 @@ using LIBC_NAMESPACE::cpp::optional; // Inserting or removing blocks too small to be tracked does nothing. TEST(LlvmLibcFreeStore, TooSmall) { byte mem[1024]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *too_small = *maybeBlock; - maybeBlock = too_small->split(sizeof(Block<>::offset_type)); + Block *too_small = *maybeBlock; + maybeBlock = too_small->split(sizeof(size_t)); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *remainder = *maybeBlock; + Block *remainder = *maybeBlock; FreeStore store; store.set_range({0, 4096}); @@ -39,24 +39,22 @@ TEST(LlvmLibcFreeStore, TooSmall) { TEST(LlvmLibcFreeStore, RemoveBestFit) { byte mem[1024]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *smallest = *maybeBlock; - maybeBlock = - smallest->split(sizeof(FreeList::Node) + sizeof(Block<>::offset_type)); + Block *smallest = *maybeBlock; + maybeBlock = smallest->split(sizeof(FreeList::Node) + sizeof(size_t)); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *largest_small = *maybeBlock; - maybeBlock = - largest_small->split(sizeof(FreeTrie::Node) + - sizeof(Block<>::offset_type) - alignof(max_align_t)); + Block *largest_small = *maybeBlock; + maybeBlock = largest_small->split(sizeof(FreeTrie::Node) + sizeof(size_t) - + alignof(max_align_t)); ASSERT_TRUE(maybeBlock.has_value()); if (largest_small->inner_size() == smallest->inner_size()) largest_small = smallest; ASSERT_GE(largest_small->inner_size(), smallest->inner_size()); - Block<> *remainder = *maybeBlock; + Block *remainder = *maybeBlock; FreeStore store; store.set_range({0, 4096}); @@ -74,8 +72,7 @@ TEST(LlvmLibcFreeStore, RemoveBestFit) { store.insert(largest_small); // Search small list for best fit. - Block<> *next_smallest = - largest_small == smallest ? remainder : largest_small; + Block *next_smallest = largest_small == smallest ? remainder : largest_small; ASSERT_EQ(store.remove_best_fit(smallest->inner_size() + 1), next_smallest); store.insert(next_smallest); @@ -85,15 +82,14 @@ TEST(LlvmLibcFreeStore, RemoveBestFit) { TEST(LlvmLibcFreeStore, Remove) { byte mem[1024]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *small = *maybeBlock; - maybeBlock = - small->split(sizeof(FreeList::Node) + sizeof(Block<>::offset_type)); + Block *small = *maybeBlock; + maybeBlock = small->split(sizeof(FreeList::Node) + sizeof(size_t)); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *remainder = *maybeBlock; + Block *remainder = *maybeBlock; FreeStore store; store.set_range({0, 4096}); @@ -102,8 +98,8 @@ TEST(LlvmLibcFreeStore, Remove) { store.remove(remainder); ASSERT_EQ(store.remove_best_fit(remainder->inner_size()), - static_cast *>(nullptr)); + static_cast(nullptr)); store.remove(small); ASSERT_EQ(store.remove_best_fit(small->inner_size()), - static_cast *>(nullptr)); + static_cast(nullptr)); } diff --git a/libc/test/src/__support/freetrie_test.cpp b/libc/test/src/__support/freetrie_test.cpp index 1e3caceb7293bb..5663a01687294e 100644 --- a/libc/test/src/__support/freetrie_test.cpp +++ b/libc/test/src/__support/freetrie_test.cpp @@ -21,9 +21,9 @@ TEST(LlvmLibcFreeTrie, FindBestFitRoot) { EXPECT_EQ(trie.find_best_fit(123), static_cast(nullptr)); byte mem[1024]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *block = *maybeBlock; + Block *block = *maybeBlock; trie.push(block); FreeTrie::Node *root = trie.find_best_fit(0); @@ -37,12 +37,12 @@ TEST(LlvmLibcFreeTrie, FindBestFitRoot) { TEST(LlvmLibcFreeTrie, FindBestFitLower) { byte mem[4096]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *lower = *maybeBlock; + Block *lower = *maybeBlock; maybeBlock = lower->split(512); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *root = *maybeBlock; + Block *root = *maybeBlock; FreeTrie trie({0, 4096}); trie.push(root); @@ -53,12 +53,12 @@ TEST(LlvmLibcFreeTrie, FindBestFitLower) { TEST(LlvmLibcFreeTrie, FindBestFitUpper) { byte mem[4096]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *root = *maybeBlock; + Block *root = *maybeBlock; maybeBlock = root->split(512); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *upper = *maybeBlock; + Block *upper = *maybeBlock; FreeTrie trie({0, 4096}); trie.push(root); @@ -71,15 +71,15 @@ TEST(LlvmLibcFreeTrie, FindBestFitUpper) { TEST(LlvmLibcFreeTrie, FindBestFitLowerAndUpper) { byte mem[4096]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *root = *maybeBlock; + Block *root = *maybeBlock; maybeBlock = root->split(1024); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *lower = *maybeBlock; + Block *lower = *maybeBlock; maybeBlock = lower->split(128); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *upper = *maybeBlock; + Block *upper = *maybeBlock; FreeTrie trie({0, 4096}); trie.push(root); @@ -95,16 +95,16 @@ TEST(LlvmLibcFreeTrie, FindBestFitLowerAndUpper) { TEST(LlvmLibcFreeTrie, Remove) { byte mem[4096]; - optional *> maybeBlock = Block<>::init(mem); + optional maybeBlock = Block::init(mem); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *small1 = *maybeBlock; + Block *small1 = *maybeBlock; maybeBlock = small1->split(512); ASSERT_TRUE(maybeBlock.has_value()); - Block<> *small2 = *maybeBlock; + Block *small2 = *maybeBlock; maybeBlock = small2->split(512); ASSERT_TRUE(maybeBlock.has_value()); ASSERT_EQ(small1->inner_size(), small2->inner_size()); - Block<> *large = *maybeBlock; + Block *large = *maybeBlock; // Removing the root empties the trie. FreeTrie trie({0, 4096}); diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h index 8be8c0d5f85532..b2e7f47cf9dfb6 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/libc/utils/gpu/loader/Loader.h @@ -13,6 +13,7 @@ #include "include/llvm-libc-types/rpc_opcodes_t.h" #include "include/llvm-libc-types/test_rpc_opcodes_t.h" +#include "shared/rpc.h" #include #include @@ -103,129 +104,95 @@ inline void handle_error_impl(const char *file, int32_t line, const char *msg) { fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg); exit(EXIT_FAILURE); } - -inline void handle_error_impl(const char *file, int32_t line, - rpc_status_t err) { - fprintf(stderr, "%s:%d:0: Error: %d\n", file, line, err); - exit(EXIT_FAILURE); -} #define handle_error(X) handle_error_impl(__FILE__, __LINE__, X) -template -inline void register_rpc_callbacks(rpc_device_t device) { - static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size"); - // Register the ping test for the `libc` tests. - rpc_register_callback( - device, static_cast(RPC_TEST_INCREMENT), - [](rpc_port_t port, void *data) { - rpc_recv_and_send( - port, - [](rpc_buffer_t *buffer, void *data) { - reinterpret_cast(buffer->data)[0] += 1; - }, - data); - }, - nullptr); - - // Register the interface test callbacks. - rpc_register_callback( - device, static_cast(RPC_TEST_INTERFACE), - [](rpc_port_t port, void *data) { - uint64_t cnt = 0; - bool end_with_recv; - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &end_with_recv); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - if (end_with_recv) - rpc_recv( - port, - [](rpc_buffer_t *buffer, void *data) { - *reinterpret_cast(data) = buffer->data[0]; - }, - &cnt); - else - rpc_send( - port, - [](rpc_buffer_t *buffer, void *data) { - uint64_t &cnt = *reinterpret_cast(data); - buffer->data[0] = cnt = cnt + 1; - }, - &cnt); - }, - nullptr); - - // Register the stream test handler. - rpc_register_callback( - device, static_cast(RPC_TEST_STREAM), - [](rpc_port_t port, void *data) { - uint64_t sizes[lane_size] = {0}; - void *dst[lane_size] = {nullptr}; - rpc_recv_n( - port, dst, sizes, - [](uint64_t size, void *) -> void * { return new char[size]; }, - nullptr); - rpc_send_n(port, dst, sizes); - for (uint64_t i = 0; i < lane_size; ++i) { - if (dst[i]) - delete[] reinterpret_cast(dst[i]); - } - }, - nullptr); +template +inline uint32_t handle_server(rpc::Server &server, uint32_t index, + Alloc &&alloc, Free &&free) { + auto port = server.try_open(num_lanes, index); + if (!port) + return 0; + index = port->get_index() + 1; + + int status = rpc::SUCCESS; + switch (port->get_opcode()) { + case RPC_TEST_INCREMENT: { + port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { + reinterpret_cast(buffer->data)[0] += 1; + }); + break; + } + case RPC_TEST_INTERFACE: { + bool end_with_recv; + uint64_t cnt; + port->recv([&](rpc::Buffer *buffer, uint32_t) { + end_with_recv = buffer->data[0]; + }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + if (end_with_recv) + port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); + else + port->send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = cnt = cnt + 1; + }); + + break; + } + case RPC_TEST_STREAM: { + uint64_t sizes[num_lanes] = {0}; + void *dst[num_lanes] = {nullptr}; + port->recv_n(dst, sizes, + [](uint64_t size) -> void * { return new char[size]; }); + port->send_n(dst, sizes); + for (uint64_t i = 0; i < num_lanes; ++i) { + if (dst[i]) + delete[] reinterpret_cast(dst[i]); + } + break; + } + case RPC_TEST_NOOP: { + port->recv([&](rpc::Buffer *, uint32_t) {}); + break; + } + case RPC_MALLOC: { + port->recv_and_send([&](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = reinterpret_cast(alloc(buffer->data[0])); + }); + break; + } + case RPC_FREE: { + port->recv([&](rpc::Buffer *buffer, uint32_t) { + free(reinterpret_cast(buffer->data[0])); + }); + break; + } + default: + status = libc_handle_rpc_port(&*port, num_lanes); + break; + } + + // Handle all of the `libc` specific opcodes. + if (status != rpc::SUCCESS) + handle_error("Error handling RPC server"); + + port->close(); + + return index; } #endif diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp index d825a6299263ae..13a13668335471 100644 --- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp @@ -160,7 +160,7 @@ template hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, hsa_amd_memory_pool_t kernargs_pool, hsa_amd_memory_pool_t coarsegrained_pool, - hsa_queue_t *queue, rpc_device_t device, + hsa_queue_t *queue, rpc::Server &server, const LaunchParameters ¶ms, const char *kernel_name, args_t kernel_args, bool print_resource_usage) { @@ -170,37 +170,10 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, executable, kernel_name, &dev_agent, &symbol)) return err; - // Register RPC callbacks for the malloc and free functions on HSA. - auto tuple = std::make_tuple(dev_agent, coarsegrained_pool); - rpc_register_callback( - device, RPC_MALLOC, - [](rpc_port_t port, void *data) { - auto malloc_handler = [](rpc_buffer_t *buffer, void *data) -> void { - auto &[dev_agent, pool] = *static_cast(data); - uint64_t size = buffer->data[0]; - void *dev_ptr = nullptr; - if (hsa_status_t err = - hsa_amd_memory_pool_allocate(pool, size, - /*flags=*/0, &dev_ptr)) - dev_ptr = nullptr; - hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr); - buffer->data[0] = reinterpret_cast(dev_ptr); - }; - rpc_recv_and_send(port, malloc_handler, data); - }, - &tuple); - rpc_register_callback( - device, RPC_FREE, - [](rpc_port_t port, void *data) { - auto free_handler = [](rpc_buffer_t *buffer, void *) { - if (hsa_status_t err = hsa_amd_memory_pool_free( - reinterpret_cast(buffer->data[0]))) - handle_error(err); - }; - rpc_recv_and_send(port, free_handler, data); - }, - nullptr); - + uint32_t wavefront_size = 0; + if (hsa_status_t err = hsa_agent_get_info( + dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size)) + handle_error(err); // Retrieve different properties of the kernel symbol used for launch. uint64_t kernel; uint32_t args_size; @@ -292,14 +265,38 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, hsa_signal_store_relaxed(queue->doorbell_signal, packet_id); std::atomic finished = false; - std::thread server( - [](std::atomic *finished, rpc_device_t device) { - while (!*finished) { - if (rpc_status_t err = rpc_handle_server(device)) + std::thread server_thread( + [](std::atomic *finished, rpc::Server *server, + uint32_t wavefront_size, hsa_agent_t dev_agent, + hsa_amd_memory_pool_t coarsegrained_pool) { + // Register RPC callbacks for the malloc and free functions on HSA. + auto malloc_handler = [&](size_t size) -> void * { + void *dev_ptr = nullptr; + if (hsa_status_t err = + hsa_amd_memory_pool_allocate(coarsegrained_pool, size, + /*flags=*/0, &dev_ptr)) + dev_ptr = nullptr; + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr); + return dev_ptr; + }; + + auto free_handler = [](void *ptr) -> void { + if (hsa_status_t err = + hsa_amd_memory_pool_free(reinterpret_cast(ptr))) handle_error(err); + }; + + uint32_t index = 0; + while (!*finished) { + if (wavefront_size == 32) + index = + handle_server<32>(*server, index, malloc_handler, free_handler); + else + index = + handle_server<64>(*server, index, malloc_handler, free_handler); } }, - &finished, device); + &finished, &server, wavefront_size, dev_agent, coarsegrained_pool); // Wait until the kernel has completed execution on the device. Periodically // check the RPC client for work to be performed on the server. @@ -309,8 +306,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable, ; finished = true; - if (server.joinable()) - server.join(); + if (server_thread.joinable()) + server_thread.join(); // Destroy the resources acquired to launch the kernel and return. if (hsa_status_t err = hsa_amd_memory_pool_free(args)) @@ -457,34 +454,22 @@ int load(int argc, const char **argv, const char **envp, void *image, handle_error(err); // Set up the RPC server. - auto tuple = std::make_tuple(dev_agent, finegrained_pool); - auto rpc_alloc = [](uint64_t size, void *data) { - auto &[dev_agent, finegrained_pool] = *static_cast(data); - void *dev_ptr = nullptr; - if (hsa_status_t err = hsa_amd_memory_pool_allocate(finegrained_pool, size, - /*flags=*/0, &dev_ptr)) - handle_error(err); - hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr); - return dev_ptr; - }; - rpc_device_t device; - if (rpc_status_t err = rpc_server_init(&device, RPC_MAXIMUM_PORT_COUNT, - wavefront_size, rpc_alloc, &tuple)) + void *rpc_buffer; + if (hsa_status_t err = hsa_amd_memory_pool_allocate( + finegrained_pool, + rpc::Server::allocation_size(wavefront_size, rpc::MAX_PORT_COUNT), + /*flags=*/0, &rpc_buffer)) handle_error(err); + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, rpc_buffer); - // Register callbacks for the RPC unit tests. - if (wavefront_size == 32) - register_rpc_callbacks<32>(device); - else if (wavefront_size == 64) - register_rpc_callbacks<64>(device); - else - handle_error("Invalid wavefront size"); + rpc::Server server(rpc::MAX_PORT_COUNT, rpc_buffer); + rpc::Client client(rpc::MAX_PORT_COUNT, rpc_buffer); // Initialize the RPC client on the device by copying the local data to the // device's internal pointer. hsa_executable_symbol_t rpc_client_sym; if (hsa_status_t err = hsa_executable_get_symbol_by_name( - executable, rpc_client_symbol_name, &dev_agent, &rpc_client_sym)) + executable, "__llvm_libc_rpc_client", &dev_agent, &rpc_client_sym)) handle_error(err); void *rpc_client_host; @@ -507,19 +492,17 @@ int load(int argc, const char **argv, const char **envp, void *image, void *rpc_client_buffer; if (hsa_status_t err = - hsa_amd_memory_lock(const_cast(rpc_get_client_buffer(device)), - rpc_get_client_size(), + hsa_amd_memory_lock(&client, sizeof(rpc::Client), /*agents=*/nullptr, 0, &rpc_client_buffer)) handle_error(err); // Copy the RPC client buffer to the address pointed to by the symbol. if (hsa_status_t err = hsa_memcpy(*reinterpret_cast(rpc_client_host), dev_agent, - rpc_client_buffer, host_agent, rpc_get_client_size())) + rpc_client_buffer, host_agent, sizeof(rpc::Client))) handle_error(err); - if (hsa_status_t err = hsa_amd_memory_unlock( - const_cast(rpc_get_client_buffer(device)))) + if (hsa_status_t err = hsa_amd_memory_unlock(&client)) handle_error(err); if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_client_host)) handle_error(err); @@ -571,7 +554,7 @@ int load(int argc, const char **argv, const char **envp, void *image, LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1}; begin_args_t init_args = {argc, dev_argv, dev_envp}; if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool, - coarsegrained_pool, queue, device, + coarsegrained_pool, queue, server, single_threaded_params, "_begin.kd", init_args, print_resource_usage)) handle_error(err); @@ -579,7 +562,7 @@ int load(int argc, const char **argv, const char **envp, void *image, start_args_t args = {argc, dev_argv, dev_envp, dev_ret}; if (hsa_status_t err = launch_kernel( dev_agent, executable, kernargs_pool, coarsegrained_pool, queue, - device, params, "_start.kd", args, print_resource_usage)) + server, params, "_start.kd", args, print_resource_usage)) handle_error(err); void *host_ret; @@ -598,14 +581,12 @@ int load(int argc, const char **argv, const char **envp, void *image, end_args_t fini_args = {ret}; if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool, - coarsegrained_pool, queue, device, + coarsegrained_pool, queue, server, single_threaded_params, "_end.kd", fini_args, print_resource_usage)) handle_error(err); - if (rpc_status_t err = rpc_server_shutdown( - device, [](void *ptr, void *) { hsa_amd_memory_pool_free(ptr); }, - nullptr)) + if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_buffer)) handle_error(err); // Free the memory allocated for the device. diff --git a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp b/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp index 58e5e5f04d0a70..0ba217451feaea 100644 --- a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp +++ b/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp @@ -167,10 +167,9 @@ void print_kernel_resources(CUmodule binary, const char *kernel_name) { } template -CUresult launch_kernel(CUmodule binary, CUstream stream, - rpc_device_t rpc_device, const LaunchParameters ¶ms, - const char *kernel_name, args_t kernel_args, - bool print_resource_usage) { +CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server, + const LaunchParameters ¶ms, const char *kernel_name, + args_t kernel_args, bool print_resource_usage) { // look up the '_start' kernel in the loaded module. CUfunction function; if (CUresult err = cuModuleGetFunction(&function, binary, kernel_name)) @@ -181,23 +180,21 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &kernel_args, CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size, CU_LAUNCH_PARAM_END}; + if (print_resource_usage) + print_kernel_resources(binary, kernel_name); - // Initialize a non-blocking CUDA stream to allocate memory if needed. This - // needs to be done on a separate stream or else it will deadlock with the - // executing kernel. + // Initialize a non-blocking CUDA stream to allocate memory if needed. + // This needs to be done on a separate stream or else it will deadlock + // with the executing kernel. CUstream memory_stream; if (CUresult err = cuStreamCreate(&memory_stream, CU_STREAM_NON_BLOCKING)) handle_error(err); - // Register RPC callbacks for the malloc and free functions on HSA. - register_rpc_callbacks<32>(rpc_device); - - rpc_register_callback( - rpc_device, RPC_MALLOC, - [](rpc_port_t port, void *data) { - auto malloc_handler = [](rpc_buffer_t *buffer, void *data) -> void { - CUstream memory_stream = *static_cast(data); - uint64_t size = buffer->data[0]; + std::atomic finished = false; + std::thread server_thread( + [](std::atomic *finished, rpc::Server *server, + CUstream memory_stream) { + auto malloc_handler = [&](size_t size) -> void * { CUdeviceptr dev_ptr; if (CUresult err = cuMemAllocAsync(&dev_ptr, size, memory_stream)) dev_ptr = 0UL; @@ -205,36 +202,22 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, // Wait until the memory allocation is complete. while (cuStreamQuery(memory_stream) == CUDA_ERROR_NOT_READY) ; - buffer->data[0] = static_cast(dev_ptr); + return reinterpret_cast(dev_ptr); }; - rpc_recv_and_send(port, malloc_handler, data); - }, - &memory_stream); - rpc_register_callback( - rpc_device, RPC_FREE, - [](rpc_port_t port, void *data) { - auto free_handler = [](rpc_buffer_t *buffer, void *data) { - CUstream memory_stream = *static_cast(data); - if (CUresult err = cuMemFreeAsync( - static_cast(buffer->data[0]), memory_stream)) + + auto free_handler = [&](void *ptr) -> void { + if (CUresult err = cuMemFreeAsync(reinterpret_cast(ptr), + memory_stream)) handle_error(err); }; - rpc_recv_and_send(port, free_handler, data); - }, - &memory_stream); - if (print_resource_usage) - print_kernel_resources(binary, kernel_name); - - std::atomic finished = false; - std::thread server( - [](std::atomic *finished, rpc_device_t device) { + uint32_t index = 0; while (!*finished) { - if (rpc_status_t err = rpc_handle_server(device)) - handle_error(err); + index = + handle_server<32>(*server, index, malloc_handler, free_handler); } }, - &finished, rpc_device); + &finished, &server, memory_stream); // Call the kernel with the given arguments. if (CUresult err = cuLaunchKernel( @@ -247,8 +230,8 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, handle_error(err); finished = true; - if (server.joinable()) - server.join(); + if (server_thread.joinable()) + server_thread.join(); return CUDA_SUCCESS; } @@ -318,23 +301,20 @@ int load(int argc, const char **argv, const char **envp, void *image, handle_error(err); uint32_t warp_size = 32; - auto rpc_alloc = [](uint64_t size, void *) -> void * { - void *dev_ptr; - if (CUresult err = cuMemAllocHost(&dev_ptr, size)) - handle_error(err); - return dev_ptr; - }; - rpc_device_t rpc_device; - if (rpc_status_t err = rpc_server_init(&rpc_device, RPC_MAXIMUM_PORT_COUNT, - warp_size, rpc_alloc, nullptr)) + void *rpc_buffer = nullptr; + if (CUresult err = cuMemAllocHost( + &rpc_buffer, + rpc::Server::allocation_size(warp_size, rpc::MAX_PORT_COUNT))) handle_error(err); + rpc::Server server(rpc::MAX_PORT_COUNT, rpc_buffer); + rpc::Client client(rpc::MAX_PORT_COUNT, rpc_buffer); // Initialize the RPC client on the device by copying the local data to the // device's internal pointer. CUdeviceptr rpc_client_dev = 0; uint64_t client_ptr_size = sizeof(void *); if (CUresult err = cuModuleGetGlobal(&rpc_client_dev, &client_ptr_size, - binary, rpc_client_symbol_name)) + binary, "__llvm_libc_rpc_client")) handle_error(err); CUdeviceptr rpc_client_host = 0; @@ -342,20 +322,19 @@ int load(int argc, const char **argv, const char **envp, void *image, cuMemcpyDtoH(&rpc_client_host, rpc_client_dev, sizeof(void *))) handle_error(err); if (CUresult err = - cuMemcpyHtoD(rpc_client_host, rpc_get_client_buffer(rpc_device), - rpc_get_client_size())) + cuMemcpyHtoD(rpc_client_host, &client, sizeof(rpc::Client))) handle_error(err); LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1}; begin_args_t init_args = {argc, dev_argv, dev_envp}; if (CUresult err = - launch_kernel(binary, stream, rpc_device, single_threaded_params, + launch_kernel(binary, stream, server, single_threaded_params, "_begin", init_args, print_resource_usage)) handle_error(err); start_args_t args = {argc, dev_argv, dev_envp, reinterpret_cast(dev_ret)}; - if (CUresult err = launch_kernel(binary, stream, rpc_device, params, "_start", + if (CUresult err = launch_kernel(binary, stream, server, params, "_start", args, print_resource_usage)) handle_error(err); @@ -369,8 +348,8 @@ int load(int argc, const char **argv, const char **envp, void *image, end_args_t fini_args = {host_ret}; if (CUresult err = - launch_kernel(binary, stream, rpc_device, single_threaded_params, - "_end", fini_args, print_resource_usage)) + launch_kernel(binary, stream, server, single_threaded_params, "_end", + fini_args, print_resource_usage)) handle_error(err); // Free the memory allocated for the device. @@ -380,8 +359,7 @@ int load(int argc, const char **argv, const char **envp, void *image, handle_error(err); if (CUresult err = cuMemFreeHost(dev_argv)) handle_error(err); - if (rpc_status_t err = rpc_server_shutdown( - rpc_device, [](void *ptr, void *) { cuMemFreeHost(ptr); }, nullptr)) + if (CUresult err = cuMemFreeHost(rpc_buffer)) handle_error(err); // Destroy the context and the loaded binary. diff --git a/libc/utils/gpu/server/llvmlibc_rpc_server.h b/libc/utils/gpu/server/llvmlibc_rpc_server.h index 98df882afa21cf..b7f173734345c0 100644 --- a/libc/utils/gpu/server/llvmlibc_rpc_server.h +++ b/libc/utils/gpu/server/llvmlibc_rpc_server.h @@ -15,99 +15,7 @@ extern "C" { #endif -/// The maximum number of ports that can be opened for any server. -const uint64_t RPC_MAXIMUM_PORT_COUNT = 4096; - -/// The symbol name associated with the client for use with the LLVM C library -/// implementation. -const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client"; - -/// status codes. -typedef enum { - RPC_STATUS_SUCCESS = 0x0, - RPC_STATUS_CONTINUE = 0x1, - RPC_STATUS_ERROR = 0x1000, - RPC_STATUS_UNHANDLED_OPCODE = 0x1001, - RPC_STATUS_INVALID_LANE_SIZE = 0x1002, -} rpc_status_t; - -/// A struct containing an opaque handle to an RPC port. This is what allows the -/// server to communicate with the client. -typedef struct rpc_port_s { - uint64_t handle; - uint32_t lane_size; -} rpc_port_t; - -/// A fixed-size buffer containing the payload sent from the client. -typedef struct rpc_buffer_s { - uint64_t data[8]; -} rpc_buffer_t; - -/// An opaque handle to an RPC server that can be attached to a device. -typedef struct rpc_device_s { - uintptr_t handle; -} rpc_device_t; - -/// A function used to allocate \p bytes for use by the RPC server and client. -/// The memory should support asynchronous and atomic access from both the -/// client and server. -typedef void *(*rpc_alloc_ty)(uint64_t size, void *data); - -/// A function used to free the \p ptr previously allocated. -typedef void (*rpc_free_ty)(void *ptr, void *data); - -/// A callback function provided with a \p port to communicate with the RPC -/// client. This will be called by the server to handle an opcode. -typedef void (*rpc_opcode_callback_ty)(rpc_port_t port, void *data); - -/// A callback function to use the port to receive or send a \p buffer. -typedef void (*rpc_port_callback_ty)(rpc_buffer_t *buffer, void *data); - -/// Initialize the server for a given device and return it in \p device. -rpc_status_t rpc_server_init(rpc_device_t *rpc_device, uint64_t num_ports, - uint32_t lane_size, rpc_alloc_ty alloc, - void *data); - -/// Shut down the server for a given device. -rpc_status_t rpc_server_shutdown(rpc_device_t rpc_device, rpc_free_ty dealloc, - void *data); - -/// Queries the RPC clients at least once and performs server-side work if there -/// are any active requests. Runs until all work on the server is completed. -rpc_status_t rpc_handle_server(rpc_device_t rpc_device); - -/// Register a callback to handle an opcode from the RPC client. The associated -/// data must remain accessible as long as the user intends to handle the server -/// with this callback. -rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint32_t opcode, - rpc_opcode_callback_ty callback, void *data); - -/// Obtain a pointer to a local client buffer that can be copied directly to the -/// other process using the address stored at the rpc client symbol name. -const void *rpc_get_client_buffer(rpc_device_t device); - -/// Returns the size of the client in bytes to be used for a memory copy. -uint64_t rpc_get_client_size(); - -/// Use the \p port to send a buffer using the \p callback. -void rpc_send(rpc_port_t port, rpc_port_callback_ty callback, void *data); - -/// Use the \p port to send \p bytes using the \p callback. The input is an -/// array of at least the configured lane size. -void rpc_send_n(rpc_port_t port, const void *const *src, uint64_t *size); - -/// Use the \p port to recieve a buffer using the \p callback. -void rpc_recv(rpc_port_t port, rpc_port_callback_ty callback, void *data); - -/// Use the \p port to recieve \p bytes using the \p callback. The inputs is an -/// array of at least the configured lane size. The \p alloc function allocates -/// memory for the recieved bytes. -void rpc_recv_n(rpc_port_t port, void **dst, uint64_t *size, rpc_alloc_ty alloc, - void *data); - -/// Use the \p port to receive and send a buffer using the \p callback. -void rpc_recv_and_send(rpc_port_t port, rpc_port_callback_ty callback, - void *data); +int libc_handle_rpc_port(void *port, uint32_t num_lanes); #ifdef __cplusplus } diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 972601aaf1d5e0..1fdbb79df7e3e0 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -14,15 +14,16 @@ // Make sure these are included first so they don't conflict with the system. #include +#include "shared/rpc.h" + #include "llvmlibc_rpc_server.h" -#include "src/__support/RPC/rpc.h" +#include "include/llvm-libc-types/rpc_opcodes_t.h" #include "src/__support/arg_list.h" #include "src/stdio/printf_core/converter.h" #include "src/stdio/printf_core/parser.h" #include "src/stdio/printf_core/writer.h" -#include "src/stdio/gpu/file.h" #include #include #include @@ -36,12 +37,6 @@ using namespace LIBC_NAMESPACE; using namespace LIBC_NAMESPACE::printf_core; -static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer), - "Buffer size mismatch"); - -static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT, - "Incorrect maximum port count"); - namespace { struct TempStorage { char *alloc(size_t size) { @@ -53,9 +48,29 @@ struct TempStorage { }; } // namespace -template +enum Stream { + File = 0, + Stdin = 1, + Stdout = 2, + Stderr = 3, +}; + +// Get the associated stream out of an encoded number. +LIBC_INLINE ::FILE *to_stream(uintptr_t f) { + ::FILE *stream = reinterpret_cast(f & ~0x3ull); + Stream type = static_cast(f & 0x3ull); + if (type == Stdin) + return stdin; + if (type == Stdout) + return stdout; + if (type == Stderr) + return stderr; + return stream; +} + +template static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { - FILE *files[lane_size] = {nullptr}; + FILE *files[num_lanes] = {nullptr}; // Get the appropriate output stream to use. if (port.get_opcode() == RPC_PRINTF_TO_STREAM || port.get_opcode() == RPC_PRINTF_TO_STREAM_PACKED) @@ -64,22 +79,22 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { }); else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT || port.get_opcode() == RPC_PRINTF_TO_STDOUT_PACKED) - std::fill(files, files + lane_size, stdout); + std::fill(files, files + num_lanes, stdout); else - std::fill(files, files + lane_size, stderr); + std::fill(files, files + num_lanes, stderr); - uint64_t format_sizes[lane_size] = {0}; - void *format[lane_size] = {nullptr}; + uint64_t format_sizes[num_lanes] = {0}; + void *format[num_lanes] = {nullptr}; - uint64_t args_sizes[lane_size] = {0}; - void *args[lane_size] = {nullptr}; + uint64_t args_sizes[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; // Recieve the format string and arguments from the client. port.recv_n(format, format_sizes, [&](uint64_t size) { return temp_storage.alloc(size); }); // Parse the format string to get the expected size of the buffer. - for (uint32_t lane = 0; lane < lane_size; ++lane) { + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!format[lane]) continue; @@ -104,9 +119,9 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { // Identify any arguments that are actually pointers to strings on the client. // Additionally we want to determine how much buffer space we need to print. - std::vector strs_to_copy[lane_size]; - int buffer_size[lane_size] = {0}; - for (uint32_t lane = 0; lane < lane_size; ++lane) { + std::vector strs_to_copy[num_lanes]; + int buffer_size[num_lanes] = {0}; + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!format[lane]) continue; @@ -138,7 +153,7 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { } // Recieve any strings from the client and push them into a buffer. - std::vector copied_strs[lane_size]; + std::vector copied_strs[num_lanes]; while (std::any_of(std::begin(strs_to_copy), std::end(strs_to_copy), [](const auto &v) { return !v.empty() && v.back(); })) { port.send([&](rpc::Buffer *buffer, uint32_t id) { @@ -147,11 +162,11 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { if (!strs_to_copy[id].empty()) strs_to_copy[id].pop_back(); }); - uint64_t str_sizes[lane_size] = {0}; - void *strs[lane_size] = {nullptr}; + uint64_t str_sizes[num_lanes] = {0}; + void *strs[num_lanes] = {nullptr}; port.recv_n(strs, str_sizes, [&](uint64_t size) { return temp_storage.alloc(size); }); - for (uint32_t lane = 0; lane < lane_size; ++lane) { + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!strs[lane]) continue; @@ -161,8 +176,8 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { } // Perform the final formatting and printing using the LLVM C library printf. - int results[lane_size] = {0}; - for (uint32_t lane = 0; lane < lane_size; ++lane) { + int results[num_lanes] = {0}; + for (uint32_t lane = 0; lane < num_lanes; ++lane) { if (!format[lane]) continue; @@ -212,42 +227,34 @@ static void handle_printf(rpc::Server::Port &port, TempStorage &temp_storage) { }); } -template -rpc_status_t handle_server_impl( - rpc::Server &server, - const std::unordered_map &callbacks, - const std::unordered_map &callback_data, - uint32_t &index) { - auto port = server.try_open(lane_size, index); - if (!port) - return RPC_STATUS_SUCCESS; - +template +rpc::Status handle_port_impl(rpc::Server::Port &port) { TempStorage temp_storage; - switch (port->get_opcode()) { + switch (port.get_opcode()) { case RPC_WRITE_TO_STREAM: case RPC_WRITE_TO_STDERR: case RPC_WRITE_TO_STDOUT: case RPC_WRITE_TO_STDOUT_NEWLINE: { - uint64_t sizes[lane_size] = {0}; - void *strs[lane_size] = {nullptr}; - FILE *files[lane_size] = {nullptr}; - if (port->get_opcode() == RPC_WRITE_TO_STREAM) { - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *strs[num_lanes] = {nullptr}; + FILE *files[num_lanes] = {nullptr}; + if (port.get_opcode() == RPC_WRITE_TO_STREAM) { + port.recv([&](rpc::Buffer *buffer, uint32_t id) { files[id] = reinterpret_cast(buffer->data[0]); }); - } else if (port->get_opcode() == RPC_WRITE_TO_STDERR) { - std::fill(files, files + lane_size, stderr); + } else if (port.get_opcode() == RPC_WRITE_TO_STDERR) { + std::fill(files, files + num_lanes, stderr); } else { - std::fill(files, files + lane_size, stdout); + std::fill(files, files + num_lanes, stdout); } - port->recv_n(strs, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + port.recv_n(strs, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { flockfile(files[id]); buffer->data[0] = fwrite_unlocked(strs[id], 1, sizes[id], files[id]); - if (port->get_opcode() == RPC_WRITE_TO_STDOUT_NEWLINE && + if (port.get_opcode() == RPC_WRITE_TO_STDOUT_NEWLINE && buffer->data[0] == sizes[id]) buffer->data[0] += fwrite_unlocked("\n", 1, 1, files[id]); funlockfile(files[id]); @@ -255,38 +262,37 @@ rpc_status_t handle_server_impl( break; } case RPC_READ_FROM_STREAM: { - uint64_t sizes[lane_size] = {0}; - void *data[lane_size] = {nullptr}; - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *data[num_lanes] = {nullptr}; + port.recv([&](rpc::Buffer *buffer, uint32_t id) { data[id] = temp_storage.alloc(buffer->data[0]); sizes[id] = - fread(data[id], 1, buffer->data[0], file::to_stream(buffer->data[1])); + fread(data[id], 1, buffer->data[0], to_stream(buffer->data[1])); }); - port->send_n(data, sizes); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + port.send_n(data, sizes); + port.send([&](rpc::Buffer *buffer, uint32_t id) { std::memcpy(buffer->data, &sizes[id], sizeof(uint64_t)); }); break; } case RPC_READ_FGETS: { - uint64_t sizes[lane_size] = {0}; - void *data[lane_size] = {nullptr}; - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *data[num_lanes] = {nullptr}; + port.recv([&](rpc::Buffer *buffer, uint32_t id) { data[id] = temp_storage.alloc(buffer->data[0]); - const char *str = - fgets(reinterpret_cast(data[id]), buffer->data[0], - file::to_stream(buffer->data[1])); + const char *str = fgets(reinterpret_cast(data[id]), + buffer->data[0], to_stream(buffer->data[1])); sizes[id] = !str ? 0 : std::strlen(str) + 1; }); - port->send_n(data, sizes); + port.send_n(data, sizes); break; } case RPC_OPEN_FILE: { - uint64_t sizes[lane_size] = {0}; - void *paths[lane_size] = {nullptr}; - port->recv_n(paths, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *paths[num_lanes] = {nullptr}; + port.recv_n(paths, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { FILE *file = fopen(reinterpret_cast(paths[id]), reinterpret_cast(buffer->data)); buffer->data[0] = reinterpret_cast(file); @@ -294,7 +300,7 @@ rpc_status_t handle_server_impl( break; } case RPC_CLOSE_FILE: { - port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { + port.recv_and_send([&](rpc::Buffer *buffer, uint32_t id) { FILE *file = reinterpret_cast(buffer->data[0]); buffer->data[0] = fclose(file); }); @@ -302,8 +308,8 @@ rpc_status_t handle_server_impl( } case RPC_EXIT: { // Send a response to the client to signal that we are ready to exit. - port->recv_and_send([](rpc::Buffer *, uint32_t) {}); - port->recv([](rpc::Buffer *buffer, uint32_t) { + port.recv_and_send([](rpc::Buffer *, uint32_t) {}); + port.recv([](rpc::Buffer *buffer, uint32_t) { int status = 0; std::memcpy(&status, buffer->data, sizeof(int)); exit(status); @@ -312,105 +318,105 @@ rpc_status_t handle_server_impl( } case RPC_ABORT: { // Send a response to the client to signal that we are ready to abort. - port->recv_and_send([](rpc::Buffer *, uint32_t) {}); - port->recv([](rpc::Buffer *, uint32_t) {}); + port.recv_and_send([](rpc::Buffer *, uint32_t) {}); + port.recv([](rpc::Buffer *, uint32_t) {}); abort(); break; } case RPC_HOST_CALL: { - uint64_t sizes[lane_size] = {0}; - unsigned long long results[lane_size] = {0}; - void *args[lane_size] = {nullptr}; - port->recv_n(args, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->recv([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + unsigned long long results[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; + port.recv_n(args, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.recv([&](rpc::Buffer *buffer, uint32_t id) { using func_ptr_t = unsigned long long (*)(void *); auto func = reinterpret_cast(buffer->data[0]); results[id] = func(args[id]); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast(results[id]); }); break; } case RPC_FEOF: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = feof(file::to_stream(buffer->data[0])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = feof(to_stream(buffer->data[0])); }); break; } case RPC_FERROR: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = ferror(file::to_stream(buffer->data[0])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = ferror(to_stream(buffer->data[0])); }); break; } case RPC_CLEARERR: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - clearerr(file::to_stream(buffer->data[0])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + clearerr(to_stream(buffer->data[0])); }); break; } case RPC_FSEEK: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = fseek(file::to_stream(buffer->data[0]), - static_cast(buffer->data[1]), - static_cast(buffer->data[2])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = + fseek(to_stream(buffer->data[0]), static_cast(buffer->data[1]), + static_cast(buffer->data[2])); }); break; } case RPC_FTELL: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = ftell(file::to_stream(buffer->data[0])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = ftell(to_stream(buffer->data[0])); }); break; } case RPC_FFLUSH: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = fflush(file::to_stream(buffer->data[0])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = fflush(to_stream(buffer->data[0])); }); break; } case RPC_UNGETC: { - port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { - buffer->data[0] = ungetc(static_cast(buffer->data[0]), - file::to_stream(buffer->data[1])); + port.recv_and_send([](rpc::Buffer *buffer, uint32_t) { + buffer->data[0] = + ungetc(static_cast(buffer->data[0]), to_stream(buffer->data[1])); }); break; } case RPC_PRINTF_TO_STREAM_PACKED: case RPC_PRINTF_TO_STDOUT_PACKED: case RPC_PRINTF_TO_STDERR_PACKED: { - handle_printf(*port, temp_storage); + handle_printf(port, temp_storage); break; } case RPC_PRINTF_TO_STREAM: case RPC_PRINTF_TO_STDOUT: case RPC_PRINTF_TO_STDERR: { - handle_printf(*port, temp_storage); + handle_printf(port, temp_storage); break; } case RPC_REMOVE: { - uint64_t sizes[lane_size] = {0}; - void *args[lane_size] = {nullptr}; - port->recv_n(args, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; + port.recv_n(args, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast( remove(reinterpret_cast(args[id]))); }); break; } case RPC_RENAME: { - uint64_t oldsizes[lane_size] = {0}; - uint64_t newsizes[lane_size] = {0}; - void *oldpath[lane_size] = {nullptr}; - void *newpath[lane_size] = {nullptr}; - port->recv_n(oldpath, oldsizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->recv_n(newpath, newsizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t oldsizes[num_lanes] = {0}; + uint64_t newsizes[num_lanes] = {0}; + void *oldpath[num_lanes] = {nullptr}; + void *newpath[num_lanes] = {nullptr}; + port.recv_n(oldpath, oldsizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.recv_n(newpath, newsizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast( rename(reinterpret_cast(oldpath[id]), reinterpret_cast(newpath[id]))); @@ -418,168 +424,36 @@ rpc_status_t handle_server_impl( break; } case RPC_SYSTEM: { - uint64_t sizes[lane_size] = {0}; - void *args[lane_size] = {nullptr}; - port->recv_n(args, sizes, - [&](uint64_t size) { return temp_storage.alloc(size); }); - port->send([&](rpc::Buffer *buffer, uint32_t id) { + uint64_t sizes[num_lanes] = {0}; + void *args[num_lanes] = {nullptr}; + port.recv_n(args, sizes, + [&](uint64_t size) { return temp_storage.alloc(size); }); + port.send([&](rpc::Buffer *buffer, uint32_t id) { buffer->data[0] = static_cast( system(reinterpret_cast(args[id]))); }); break; } case RPC_NOOP: { - port->recv([](rpc::Buffer *, uint32_t) {}); + port.recv([](rpc::Buffer *, uint32_t) {}); break; } - default: { - auto handler = - callbacks.find(static_cast(port->get_opcode())); - - // We error out on an unhandled opcode. - if (handler == callbacks.end()) - return RPC_STATUS_UNHANDLED_OPCODE; - - // Invoke the registered callback with a reference to the port. - void *data = - callback_data.at(static_cast(port->get_opcode())); - rpc_port_t port_ref{reinterpret_cast(&*port), lane_size}; - (handler->second)(port_ref, data); - } - } - - // Increment the index so we start the scan after this port. - index = port->get_index() + 1; - port->close(); - - return RPC_STATUS_CONTINUE; -} - -struct Device { - Device(uint32_t lane_size, uint32_t num_ports, void *buffer) - : lane_size(lane_size), buffer(buffer), server(num_ports, buffer), - client(num_ports, buffer) {} - - rpc_status_t handle_server(uint32_t &index) { - switch (lane_size) { - case 1: - return handle_server_impl<1>(server, callbacks, callback_data, index); - case 32: - return handle_server_impl<32>(server, callbacks, callback_data, index); - case 64: - return handle_server_impl<64>(server, callbacks, callback_data, index); - default: - return RPC_STATUS_INVALID_LANE_SIZE; - } + default: + return rpc::UNHANDLED_OPCODE; } - uint32_t lane_size; - void *buffer; - rpc::Server server; - rpc::Client client; - std::unordered_map callbacks; - std::unordered_map callback_data; -}; - -rpc_status_t rpc_server_init(rpc_device_t *rpc_device, uint64_t num_ports, - uint32_t lane_size, rpc_alloc_ty alloc, - void *data) { - if (!rpc_device) - return RPC_STATUS_ERROR; - if (lane_size != 1 && lane_size != 32 && lane_size != 64) - return RPC_STATUS_INVALID_LANE_SIZE; - - uint64_t size = rpc::Server::allocation_size(lane_size, num_ports); - void *buffer = alloc(size, data); - - if (!buffer) - return RPC_STATUS_ERROR; - - Device *device = new Device(lane_size, num_ports, buffer); - if (!device) - return RPC_STATUS_ERROR; - - rpc_device->handle = reinterpret_cast(device); - return RPC_STATUS_SUCCESS; -} - -rpc_status_t rpc_server_shutdown(rpc_device_t rpc_device, rpc_free_ty dealloc, - void *data) { - if (!rpc_device.handle) - return RPC_STATUS_ERROR; - - Device *device = reinterpret_cast(rpc_device.handle); - dealloc(device->buffer, data); - delete device; - - return RPC_STATUS_SUCCESS; + return rpc::SUCCESS; } -rpc_status_t rpc_handle_server(rpc_device_t rpc_device) { - if (!rpc_device.handle) - return RPC_STATUS_ERROR; - - Device *device = reinterpret_cast(rpc_device.handle); - uint32_t index = 0; - for (;;) { - rpc_status_t status = device->handle_server(index); - if (status != RPC_STATUS_CONTINUE) - return status; +int libc_handle_rpc_port(void *port, uint32_t num_lanes) { + switch (num_lanes) { + case 1: + return handle_port_impl<1>(*reinterpret_cast(port)); + case 32: + return handle_port_impl<32>(*reinterpret_cast(port)); + case 64: + return handle_port_impl<64>(*reinterpret_cast(port)); + default: + return rpc::ERROR; } } - -rpc_status_t rpc_register_callback(rpc_device_t rpc_device, uint32_t opcode, - rpc_opcode_callback_ty callback, - void *data) { - if (!rpc_device.handle) - return RPC_STATUS_ERROR; - - Device *device = reinterpret_cast(rpc_device.handle); - - device->callbacks[opcode] = callback; - device->callback_data[opcode] = data; - return RPC_STATUS_SUCCESS; -} - -const void *rpc_get_client_buffer(rpc_device_t rpc_device) { - if (!rpc_device.handle) - return nullptr; - Device *device = reinterpret_cast(rpc_device.handle); - return &device->client; -} - -uint64_t rpc_get_client_size() { return sizeof(rpc::Client); } - -void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = reinterpret_cast(ref.handle); - port->send([=](rpc::Buffer *buffer, uint32_t) { - callback(reinterpret_cast(buffer), data); - }); -} - -void rpc_send_n(rpc_port_t ref, const void *const *src, uint64_t *size) { - auto port = reinterpret_cast(ref.handle); - port->send_n(src, size); -} - -void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { - auto port = reinterpret_cast(ref.handle); - port->recv([=](rpc::Buffer *buffer, uint32_t) { - callback(reinterpret_cast(buffer), data); - }); -} - -void rpc_recv_n(rpc_port_t ref, void **dst, uint64_t *size, rpc_alloc_ty alloc, - void *data) { - auto port = reinterpret_cast(ref.handle); - auto alloc_fn = [=](uint64_t size) { return alloc(size, data); }; - port->recv_n(dst, size, alloc_fn); -} - -void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback, - void *data) { - auto port = reinterpret_cast(ref.handle); - port->recv_and_send([=](rpc::Buffer *buffer, uint32_t) { - callback(reinterpret_cast(buffer), data); - }); -} diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 9039c6f046445b..d520c46bae1ef1 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -102,6 +102,9 @@ Deprecations and Removals headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to suppress deprecation for these headers. +- The ``_LIBCPP_DISABLE_AVAILABILITY`` macro that was used to force-disable availability markup has now been removed. + Whether availability markup is used by the library is now solely controlled at configuration-time. + Upcoming Deprecations and Removals ---------------------------------- diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h index d805c5a4d978d3..efda2a04a4841d 100644 --- a/libcxx/include/__configuration/availability.h +++ b/libcxx/include/__configuration/availability.h @@ -67,13 +67,6 @@ // // [1]: https://clang.llvm.org/docs/AttributeReference.html#availability -// For backwards compatibility, allow users to define _LIBCPP_DISABLE_AVAILABILITY -// for a while. -#if defined(_LIBCPP_DISABLE_AVAILABILITY) -# undef _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS -# define _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS 0 -#endif - // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) diff --git a/libcxx/include/__memory_resource/synchronized_pool_resource.h b/libcxx/include/__memory_resource/synchronized_pool_resource.h index 6384564afc917a..bcc1ac4a172e3a 100644 --- a/libcxx/include/__memory_resource/synchronized_pool_resource.h +++ b/libcxx/include/__memory_resource/synchronized_pool_resource.h @@ -10,10 +10,12 @@ #define _LIBCPP___MEMORY_RESOURCE_SYNCHRONIZED_POOL_RESOURCE_H #include <__config> +#include <__cstddef/size_t.h> #include <__memory_resource/memory_resource.h> #include <__memory_resource/pool_options.h> #include <__memory_resource/unsynchronized_pool_resource.h> -#include +#include <__mutex/mutex.h> +#include <__mutex/unique_lock.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/future b/libcxx/include/future index cbf3ed93464176..5b2e9eed88e358 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -368,6 +368,7 @@ template struct uses_allocator, Alloc>; # include <__assert> # include <__chrono/duration.h> +# include <__chrono/steady_clock.h> # include <__chrono/time_point.h> # include <__condition_variable/condition_variable.h> # include <__exception/exception_ptr.h> @@ -381,6 +382,9 @@ template struct uses_allocator, Alloc>; # include <__memory/shared_count.h> # include <__memory/unique_ptr.h> # include <__memory/uses_allocator.h> +# include <__mutex/lock_guard.h> +# include <__mutex/mutex.h> +# include <__mutex/unique_lock.h> # include <__system_error/error_category.h> # include <__system_error/error_code.h> # include <__system_error/error_condition.h> @@ -390,14 +394,19 @@ template struct uses_allocator, Alloc>; # include <__type_traits/conditional.h> # include <__type_traits/decay.h> # include <__type_traits/enable_if.h> +# include <__type_traits/invoke.h> +# include <__type_traits/is_same.h> +# include <__type_traits/remove_cvref.h> +# include <__type_traits/remove_reference.h> # include <__type_traits/strip_signature.h> # include <__type_traits/underlying_type.h> # include <__utility/auto_cast.h> # include <__utility/forward.h> # include <__utility/move.h> -# include +# include <__utility/swap.h> # include # include +# include # include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/memory_resource b/libcxx/include/memory_resource index 7de69e67b7c062..e54b7e6e2473fe 100644 --- a/libcxx/include/memory_resource +++ b/libcxx/include/memory_resource @@ -66,6 +66,10 @@ namespace std::pmr { # pragma GCC system_header #endif +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER >= 17 && _LIBCPP_STD_VER <= 20 +# include +#endif + #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include #endif diff --git a/libcxx/include/syncstream b/libcxx/include/syncstream index 970706976e1ff1..2699a4b3a6fbbd 100644 --- a/libcxx/include/syncstream +++ b/libcxx/include/syncstream @@ -121,6 +121,7 @@ namespace std { #if _LIBCPP_HAS_LOCALIZATION +# include <__mutex/lock_guard.h> # include <__utility/move.h> # include # include // required for declaration of default arguments @@ -129,7 +130,6 @@ namespace std { # if _LIBCPP_HAS_THREADS # include -# include # include # endif diff --git a/libcxx/src/.clang-tidy b/libcxx/src/.clang-tidy new file mode 100644 index 00000000000000..ec8f2e0a76a3c4 --- /dev/null +++ b/libcxx/src/.clang-tidy @@ -0,0 +1,4 @@ +InheritParentConfig: true + +Checks: > + -readability-identifier-naming diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index a008b4d76edde6..096c321672474d 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -465,7 +465,6 @@ future iosfwd future istream future limits future locale -future mutex future new future ratio future sstream @@ -692,11 +691,9 @@ memory_resource compare memory_resource cstdint memory_resource ctime memory_resource limits -memory_resource mutex memory_resource new memory_resource ratio memory_resource tuple -memory_resource typeinfo memory_resource version mutex cerrno mutex climits @@ -1076,7 +1073,6 @@ syncstream iosfwd syncstream limits syncstream locale syncstream map -syncstream mutex syncstream new syncstream optional syncstream ostream diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index d5321da32b3d46..74d912e5fe3a31 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -464,7 +464,6 @@ future iosfwd future istream future limits future locale -future mutex future new future ratio future sstream @@ -691,11 +690,9 @@ memory_resource compare memory_resource cstdint memory_resource ctime memory_resource limits -memory_resource mutex memory_resource new memory_resource ratio memory_resource tuple -memory_resource typeinfo memory_resource version mutex cerrno mutex climits @@ -1075,7 +1072,6 @@ syncstream iosfwd syncstream limits syncstream locale syncstream map -syncstream mutex syncstream new syncstream optional syncstream ostream diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index 36abefe41e2cd0..b8e982b653d390 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -17,13 +17,6 @@ # goal being to reduce the load on testers when a commit is known to fail. # -env: - # LLVM POST-BRANCH bump version - # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" - # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_HEAD_VERSION: "18" # Used compiler, update POST-BRANCH. - GCC_STABLE_VERSION: "13" - definitions: _common: &common timeout_in_minutes: 120 diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 3d416e6985d02c..57cb443798cd8f 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -114,6 +114,7 @@ struct Configuration { bool is64() const { return llvm::COFF::is64Bit(machine); } llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN; + bool machineInferred = false; size_t wordsize; bool verbose = false; WindowsSubsystem subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index df3c5a176b52e0..0c6df701284b7d 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -46,6 +46,8 @@ static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) { return COFF::isArm64EC(mt) || mt == AMD64; case ARM64X: return COFF::isAnyArm64(mt) || mt == AMD64; + case IMAGE_FILE_MACHINE_UNKNOWN: + return true; default: return ctx.config.machine == mt; } @@ -74,14 +76,26 @@ void SymbolTable::addFile(InputFile *file) { } MachineTypes mt = file->getMachineType(); - if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN) { - ctx.config.machine = mt; - ctx.driver.addWinSysRootLibSearchPaths(); - } else if (!compatibleMachineType(ctx, mt)) { + // The ARM64EC target must be explicitly specified and cannot be inferred. + if (mt == ARM64EC && + (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN || + (ctx.config.machineInferred && + (ctx.config.machine == ARM64 || ctx.config.machine == AMD64)))) { + error(toString(file) + ": machine type arm64ec is ambiguous and cannot be " + "inferred, use /machine:arm64ec or /machine:arm64x"); + return; + } + if (!compatibleMachineType(ctx, mt)) { error(toString(file) + ": machine type " + machineToStr(mt) + " conflicts with " + machineToStr(ctx.config.machine)); return; } + if (ctx.config.machine == IMAGE_FILE_MACHINE_UNKNOWN && + mt != IMAGE_FILE_MACHINE_UNKNOWN) { + ctx.config.machineInferred = true; + ctx.config.machine = mt; + ctx.driver.addWinSysRootLibSearchPaths(); + } ctx.driver.parseDirectives(file); } diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp index ad6867744c145f..6b60ebb18e8212 100644 --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -337,8 +337,9 @@ void ErrorHandler::fatal(const Twine &msg) { } SyncStream::~SyncStream() { - os.flush(); switch (level) { + case DiagLevel::None: + break; case DiagLevel::Log: e.log(buf); break; diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 7c65b8ae8c6657..b5641e5d9ce550 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -393,8 +393,8 @@ class elf::Patch843419Section final : public SyntheticSection { }; Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, - ".text.patch"), + : SyntheticSection(ctx, ".text.patch", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, 4), patchee(p), patcheeOffset(off) { this->parent = p->getParent(); patchSym = addSyntheticLocal( diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 4257e491121f21..a7120c43e51d34 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -136,8 +136,8 @@ static bool is32bitBranch(uint32_t instr) { Patch657417Section::Patch657417Section(Ctx &ctx, InputSection *p, uint64_t off, uint32_t instr, bool isARM) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, - ".text.patch"), + : SyntheticSection(ctx, ".text.patch", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, 4), patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) { parent = p->getParent(); patchSym = addSyntheticLocal( diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index ab948e65c25ed0..52fc779855a36e 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -73,7 +73,7 @@ uint32_t AMDGPU::calcEFlagsV4() const { // features in the same category are either ANY, ANY and ON, or ANY and OFF). for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) { if (retMach != (getEFlags(f) & EF_AMDGPU_MACH)) { - ErrAlways(ctx) << "incompatible mach: " << f; + Err(ctx) << "incompatible mach: " << f; return 0; } @@ -82,7 +82,7 @@ uint32_t AMDGPU::calcEFlagsV4() const { (getEFlags(f) & EF_AMDGPU_FEATURE_XNACK_V4) != EF_AMDGPU_FEATURE_XNACK_ANY_V4)) { if (retXnack != (getEFlags(f) & EF_AMDGPU_FEATURE_XNACK_V4)) { - ErrAlways(ctx) << "incompatible xnack: " << f; + Err(ctx) << "incompatible xnack: " << f; return 0; } } else { @@ -95,7 +95,7 @@ uint32_t AMDGPU::calcEFlagsV4() const { (getEFlags(f) & EF_AMDGPU_FEATURE_SRAMECC_V4) != EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)) { if (retSramEcc != (getEFlags(f) & EF_AMDGPU_FEATURE_SRAMECC_V4)) { - ErrAlways(ctx) << "incompatible sramecc: " << f; + Err(ctx) << "incompatible sramecc: " << f; return 0; } } else { @@ -143,7 +143,7 @@ uint32_t AMDGPU::calcEFlags() const { case ELFABIVERSION_AMDGPU_HSA_V6: return calcEFlagsV6(); default: - ErrAlways(ctx) << "unknown abi version: " << abiVersion; + Err(ctx) << "unknown abi version: " << abiVersion; return 0; } } diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 69ec0d34ae1195..62685b1e7dedea 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -557,8 +557,8 @@ void ARM::encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, rot = (lz + 8) << 7; } if (check && imm > 0xff) - Err(ctx) << getErrorLoc(ctx, loc) << "unencodeable immediate " - << Twine(val).str() << " for relocation " << rel.type; + Err(ctx) << getErrorLoc(ctx, loc) << "unencodeable immediate " << val + << " for relocation " << rel.type; write32(ctx, loc, (read32(ctx, loc) & 0xff3ff000) | opcode | rot | (imm & 0xff)); } @@ -1219,29 +1219,27 @@ template void ObjFile::importCmseSymbols() { sym->stOther = eSym.st_other; if (eSym.st_shndx != SHN_ABS) { - ErrAlways(ctx) << "CMSE symbol '" << sym->getName() - << "' in import library '" << this << "' is not absolute"; + Err(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" + << this << "' is not absolute"; continue; } if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) { - ErrAlways(ctx) << "CMSE symbol '" << sym->getName() - << "' in import library '" << this - << "' is not a Thumb function definition"; + Err(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" + << this << "' is not a Thumb function definition"; continue; } if (ctx.symtab->cmseImportLib.count(sym->getName())) { - ErrAlways(ctx) << "CMSE symbol '" << sym->getName() - << "' is multiply defined in import library '" << this - << "'"; + Err(ctx) << "CMSE symbol '" << sym->getName() + << "' is multiply defined in import library '" << this << "'"; continue; } if (eSym.st_size != ACLESESYM_SIZE) { Warn(ctx) << "CMSE symbol '" << sym->getName() << "' in import library '" - << this << "' does not have correct size of " - << Twine(ACLESESYM_SIZE) << " bytes"; + << this << "' does not have correct size of " << ACLESESYM_SIZE + << " bytes"; } ctx.symtab->cmseImportLib[sym->getName()] = sym; @@ -1289,8 +1287,7 @@ void elf::processArmCmseSymbols(Ctx &ctx) { // If input object build attributes do not support CMSE, error and disable // further scanning for , __acle_se_ pairs. if (!ctx.arg.armCMSESupport) { - ErrAlways(ctx) - << "CMSE is only supported by ARMv8-M architecture or later"; + Err(ctx) << "CMSE is only supported by ARMv8-M architecture or later"; ctx.arg.cmseImplib = false; break; } @@ -1300,17 +1297,16 @@ void elf::processArmCmseSymbols(Ctx &ctx) { StringRef name = acleSeSym->getName().substr(std::strlen(ACLESESYM_PREFIX)); Symbol *sym = ctx.symtab->find(name); if (!sym) { - ErrAlways(ctx) - << acleSeSym->file << ": cmse special symbol '" - << acleSeSym->getName() - << "' detected, but no associated entry function definition '" << name - << "' with external linkage found"; + Err(ctx) << acleSeSym->file << ": cmse special symbol '" + << acleSeSym->getName() + << "' detected, but no associated entry function definition '" + << name << "' with external linkage found"; continue; } std::string errMsg = checkCmseSymAttributes(ctx, acleSeSym, sym); if (!errMsg.empty()) { - ErrAlways(ctx) << errMsg; + Err(ctx) << errMsg; continue; } @@ -1331,9 +1327,9 @@ void elf::processArmCmseSymbols(Ctx &ctx) { } ArmCmseSGSection::ArmCmseSGSection(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR, - llvm::ELF::SHT_PROGBITS, - /*alignment=*/32, ".gnu.sgstubs") { + : SyntheticSection(ctx, ".gnu.sgstubs", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, + /*addralign=*/32) { entsize = ACLESESYM_SIZE; // The range of addresses used in the CMSE import library should be fixed. for (auto &[_, sym] : ctx.symtab->cmseImportLib) { @@ -1445,21 +1441,22 @@ void ArmCmseSGSection::finalizeContents() { // See Arm® v8-M Security Extensions: Requirements on Development Tools // https://developer.arm.com/documentation/ecm0359818/latest template void elf::writeARMCmseImportLib(Ctx &ctx) { - StringTableSection *shstrtab = - make(ctx, ".shstrtab", /*dynamic=*/false); - StringTableSection *strtab = - make(ctx, ".strtab", /*dynamic=*/false); - SymbolTableBaseSection *impSymTab = - make>(ctx, *strtab); + auto shstrtab = + std::make_unique(ctx, ".shstrtab", /*dynamic=*/false); + auto strtab = + std::make_unique(ctx, ".strtab", /*dynamic=*/false); + auto impSymTab = std::make_unique>(ctx, *strtab); SmallVector, SyntheticSection *>, 0> osIsPairs; osIsPairs.emplace_back( - std::make_unique(ctx, strtab->name, 0, 0), strtab); + std::make_unique(ctx, strtab->name, 0, 0), strtab.get()); osIsPairs.emplace_back( - std::make_unique(ctx, impSymTab->name, 0, 0), impSymTab); + std::make_unique(ctx, impSymTab->name, 0, 0), + impSymTab.get()); osIsPairs.emplace_back( - std::make_unique(ctx, shstrtab->name, 0, 0), shstrtab); + std::make_unique(ctx, shstrtab->name, 0, 0), + shstrtab.get()); llvm::sort(ctx.symtab->cmseSymMap, [&](const auto &a, const auto &b) { return a.second.sym->getVA(ctx) < b.second.sym->getVA(ctx); @@ -1495,8 +1492,8 @@ template void elf::writeARMCmseImportLib(Ctx &ctx) { Expected> bufferOrErr = FileOutputBuffer::create(ctx.arg.cmseOutputLib, fileSize, flags); if (!bufferOrErr) { - ErrAlways(ctx) << "failed to open " << ctx.arg.cmseOutputLib << ": " - << bufferOrErr.takeError(); + Err(ctx) << "failed to open " << ctx.arg.cmseOutputLib << ": " + << bufferOrErr.takeError(); return; } diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 0eb56de9c7f32c..ebfdbafc9983e7 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -775,8 +775,8 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (LLVM_UNLIKELY(static_cast(remove) < 0)) { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "insufficient padding bytes for " << r.type << ": " - << Twine(allBytes) << " bytes available for " - << "requested alignment of " << Twine(align) << " bytes"; + << allBytes << " bytes available for " + << "requested alignment of " << align << " bytes"; remove = 0; } break; @@ -807,7 +807,7 @@ static bool relax(Ctx &ctx, InputSection &sec) { } // Inform assignAddresses that the size has changed. if (!isUInt<32>(delta)) - Fatal(ctx) << "section size decrease is too large: " << Twine(delta); + Fatal(ctx) << "section size decrease is too large: " << delta; sec.bytesDropped = delta; return changed; } @@ -838,7 +838,7 @@ bool LoongArch::relaxOnce(int pass) const { } void LoongArch::finalizeRelax(int passes) const { - Log(ctx) << "relaxation passes: " << Twine(passes); + Log(ctx) << "relaxation passes: " << passes; SmallVector storage; for (OutputSection *osec : ctx.outputSections) { if (!(osec->flags & SHF_EXECINSTR)) diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index 02f360d73ea15e..da76820de240d5 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -503,7 +503,7 @@ calculateMipsRelChain(Ctx &ctx, uint8_t *loc, uint32_t type, uint64_t val) { if (type2 == R_MIPS_SUB && (type3 == R_MIPS_HI16 || type3 == R_MIPS_LO16)) return std::make_pair(type3, -val); Err(ctx) << getErrorLoc(ctx, loc) << "unsupported relocations combination " - << Twine(type); + << type; return std::make_pair(type & 0xff, val); } diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp index 0c64a46fe85d08..197cb30cdb8a56 100644 --- a/lld/ELF/Arch/MipsArchTree.cpp +++ b/lld/ELF/Arch/MipsArchTree.cpp @@ -72,24 +72,23 @@ static void checkFlags(Ctx &ctx, ArrayRef files) { for (const FileFlags &f : files) { if (ctx.arg.is64 && f.flags & EF_MIPS_MICROMIPS) - ErrAlways(ctx) << f.file << ": microMIPS 64-bit is not supported"; + Err(ctx) << f.file << ": microMIPS 64-bit is not supported"; uint32_t abi2 = f.flags & (EF_MIPS_ABI | EF_MIPS_ABI2); if (abi != abi2) - ErrAlways(ctx) << f.file << ": ABI '" << getAbiName(abi2) - << "' is incompatible with target ABI '" << getAbiName(abi) - << "'"; + Err(ctx) << f.file << ": ABI '" << getAbiName(abi2) + << "' is incompatible with target ABI '" << getAbiName(abi) + << "'"; bool nan2 = f.flags & EF_MIPS_NAN2008; if (nan != nan2) - ErrAlways(ctx) << f.file << ": -mnan=" << getNanName(nan2) - << " is incompatible with target -mnan=" - << getNanName(nan); + Err(ctx) << f.file << ": -mnan=" << getNanName(nan2) + << " is incompatible with target -mnan=" << getNanName(nan); bool fp2 = f.flags & EF_MIPS_FP64; if (fp != fp2) - ErrAlways(ctx) << f.file << ": -mfp" << getFpName(fp2) - << " is incompatible with target -mfp" << getFpName(fp); + Err(ctx) << f.file << ": -mfp" << getFpName(fp2) + << " is incompatible with target -mfp" << getFpName(fp); } } @@ -284,9 +283,9 @@ static uint32_t getArchFlags(Ctx &ctx, ArrayRef files) { if (isArchMatched(newFlags, ret)) continue; if (!isArchMatched(ret, newFlags)) { - ErrAlways(ctx) << "incompatible target ISA:\n>>> " << files[0].file - << ": " << getFullArchName(ret) << "\n>>> " << f.file - << ": " << getFullArchName(newFlags); + Err(ctx) << "incompatible target ISA:\n>>> " << files[0].file << ": " + << getFullArchName(ret) << "\n>>> " << f.file << ": " + << getFullArchName(newFlags); return 0; } ret = newFlags; @@ -350,15 +349,14 @@ static StringRef getMipsFpAbiName(uint8_t fpAbi) { } } -uint8_t elf::getMipsFpAbiFlag(Ctx &ctx, uint8_t oldFlag, uint8_t newFlag, - StringRef fileName) { +uint8_t elf::getMipsFpAbiFlag(Ctx &ctx, InputFile *file, uint8_t oldFlag, + uint8_t newFlag) { if (compareMipsFpAbi(newFlag, oldFlag) >= 0) return newFlag; if (compareMipsFpAbi(oldFlag, newFlag) < 0) - ErrAlways(ctx) << fileName << ": floating point ABI '" - << getMipsFpAbiName(newFlag) - << "' is incompatible with target floating point ABI '" - << getMipsFpAbiName(oldFlag) << "'"; + Err(ctx) << file << ": floating point ABI '" << getMipsFpAbiName(newFlag) + << "' is incompatible with target floating point ABI '" + << getMipsFpAbiName(oldFlag) << "'"; return oldFlag; } diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 4dc9f93f5c6880..8dd1735ee1e886 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -288,10 +288,10 @@ static void writeSequence(Ctx &ctx, const char *prefix, int from, // The full section content has the extent of [begin, end). We drop unused // instructions and write [first,end). auto *sec = make( - ctx.internalFile, SHF_ALLOC, SHT_PROGBITS, 4, + ctx.internalFile, ".text", SHT_PROGBITS, SHF_ALLOC, /*addralign=*/4, + /*entsize=*/0, ArrayRef(reinterpret_cast(buf.data() + first), - 4 * (buf.size() - first)), - ".text"); + 4 * (buf.size() - first))); ctx.inputSections.push_back(sec); for (Defined *sym : defined) { sym->section = sec; diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 5368ced9a4f538..58a71fd9545c5c 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -156,14 +156,13 @@ uint32_t RISCV::calcEFlags() const { target |= EF_RISCV_RVC; if ((eflags & EF_RISCV_FLOAT_ABI) != (target & EF_RISCV_FLOAT_ABI)) - ErrAlways(ctx) << f - << ": cannot link object files with different " - "floating-point ABI from " - << ctx.objectFiles[0]; + Err(ctx) << f + << ": cannot link object files with different " + "floating-point ABI from " + << ctx.objectFiles[0]; if ((eflags & EF_RISCV_RVE) != (target & EF_RISCV_RVE)) - ErrAlways(ctx) - << f << ": cannot link object files with different EF_RISCV_RVE"; + Err(ctx) << f << ": cannot link object files with different EF_RISCV_RVE"; } return target; @@ -659,9 +658,9 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { auto val = rel.sym->getVA(ctx, rel.addend) - rel1.sym->getVA(ctx, rel1.addend); if (overwriteULEB128(loc, val) >= 0x80) - Err(ctx) << sec.getLocation(rel.offset) << ": ULEB128 value " - << Twine(val) << " exceeds available space; references '" - << rel.sym << "'"; + Err(ctx) << sec.getLocation(rel.offset) << ": ULEB128 value " << val + << " exceeds available space; references '" << rel.sym + << "'"; ++i; continue; } @@ -833,10 +832,10 @@ static bool relax(Ctx &ctx, InputSection &sec) { if (LLVM_UNLIKELY(static_cast(remove) < 0)) { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "insufficient padding bytes for " << r.type << ": " - << Twine(r.addend) + << r.addend << " bytes available " "for requested alignment of " - << Twine(align) << " bytes"; + << align << " bytes"; remove = 0; } break; @@ -900,7 +899,7 @@ static bool relax(Ctx &ctx, InputSection &sec) { } // Inform assignAddresses that the size has changed. if (!isUInt<32>(delta)) - Fatal(ctx) << "section size decrease is too large: " << Twine(delta); + Fatal(ctx) << "section size decrease is too large: " << delta; sec.bytesDropped = delta; return changed; } @@ -933,7 +932,7 @@ bool RISCV::relaxOnce(int pass) const { void RISCV::finalizeRelax(int passes) const { llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); - Log(ctx) << "relaxation passes: " << Twine(passes); + Log(ctx) << "relaxation passes: " << passes; SmallVector storage; for (OutputSection *osec : ctx.outputSections) { if (!(osec->flags & SHF_EXECINSTR)) @@ -1045,7 +1044,7 @@ namespace { class RISCVAttributesSection final : public SyntheticSection { public: RISCVAttributesSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_RISCV_ATTRIBUTES, 1, ".riscv.attributes") { + : SyntheticSection(ctx, ".riscv.attributes", SHT_RISCV_ATTRIBUTES, 0, 1) { } size_t getSize() const override { return size; } @@ -1096,10 +1095,9 @@ static void mergeAtomic(Ctx &ctx, DenseMap::iterator it, auto reportAbiError = [&]() { Err(ctx) << "atomic abi mismatch for " << oldSection->name << "\n>>> " - << oldSection - << ": atomic_abi=" << Twine(static_cast(oldTag)) + << oldSection << ": atomic_abi=" << static_cast(oldTag) << "\n>>> " << newSection - << ": atomic_abi=" << Twine(static_cast(newTag)); + << ": atomic_abi=" << static_cast(newTag); }; auto reportUnknownAbiError = [&](const InputSectionBase *section, @@ -1112,7 +1110,7 @@ static void mergeAtomic(Ctx &ctx, DenseMap::iterator it, return; }; Err(ctx) << "unknown atomic abi for " << section->name << "\n>>> " - << section << ": atomic_abi=" << Twine(static_cast(tag)); + << section << ": atomic_abi=" << static_cast(tag); }; switch (oldTag) { case RISCVAtomicAbiTag::UNKNOWN: diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 10c52d7206b805..bc4b967ccbbbb4 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1327,15 +1327,13 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { OPT_no_lto_validate_all_vtables_have_type_infos, false); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) - ErrAlways(ctx) << "invalid optimization level for LTO: " - << Twine(ctx.arg.ltoo); + ErrAlways(ctx) << "invalid optimization level for LTO: " << ctx.arg.ltoo; unsigned ltoCgo = args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(ctx.arg.ltoo)); if (auto level = CodeGenOpt::getLevel(ltoCgo)) ctx.arg.ltoCgo = *level; else - ErrAlways(ctx) << "invalid codegen optimization level for LTO: " - << Twine(ltoCgo); + ErrAlways(ctx) << "invalid codegen optimization level for LTO: " << ltoCgo; ctx.arg.ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); ctx.arg.ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); ctx.arg.ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); @@ -2375,8 +2373,9 @@ static void markAddrsig(bool icfSafe, Symbol *s) { // We don't need to keep text sections unique under --icf=all even if they // are address-significant. if (auto *d = dyn_cast_or_null(s)) - if (d->section && (icfSafe || !(d->section->flags & SHF_EXECINSTR))) - d->section->keepUnique = true; + if (auto *sec = dyn_cast_or_null(d->section)) + if (icfSafe || !(sec->flags & SHF_EXECINSTR)) + sec->keepUnique = true; } // Record sections that define symbols mentioned in --keep-unique @@ -2391,7 +2390,8 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { Warn(ctx) << "could not find symbol " << name << " to keep unique"; continue; } - d->section->keepUnique = true; + if (auto *sec = dyn_cast(d->section)) + sec->keepUnique = true; } // --icf=all --ignore-data-address-equality means that we can ignore @@ -2700,21 +2700,6 @@ static void redirectSymbols(Ctx &ctx, ArrayRef wrapped) { ctx.symtab->wrap(w.sym, w.real, w.wrap); } -static void reportMissingFeature(Ctx &ctx, StringRef config, - const Twine &report) { - if (config == "error") - ErrAlways(ctx) << report; - else if (config == "warning") - Warn(ctx) << report; -} - -static void checkAndReportMissingFeature(Ctx &ctx, StringRef config, - uint32_t features, uint32_t mask, - const Twine &report) { - if (!(features & mask)) - reportMissingFeature(ctx, config, report); -} - // To enable CET (x86's hardware-assisted control flow enforcement), each // source file must be compiled with -fcf-protection. Object files compiled // with the flag contain feature flags indicating that they are compatible @@ -2747,28 +2732,43 @@ static void readSecurityNotes(Ctx &ctx) { bool hasValidPauthAbiCoreInfo = llvm::any_of( ctx.aarch64PauthAbiCoreInfo, [](uint8_t c) { return c != 0; }); + auto report = [&](StringRef config) -> ELFSyncStream { + if (config == "error") + return {ctx, DiagLevel::Err}; + else if (config == "warning") + return {ctx, DiagLevel::Warn}; + return {ctx, DiagLevel::None}; + }; + auto reportUnless = [&](StringRef config, bool cond) -> ELFSyncStream { + if (cond) + return {ctx, DiagLevel::None}; + return report(config); + }; for (ELFFileBase *f : ctx.objectFiles) { uint32_t features = f->andFeatures; - checkAndReportMissingFeature( - ctx, ctx.arg.zBtiReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_BTI, - toStr(ctx, f) + ": -z bti-report: file does not have " - "GNU_PROPERTY_AARCH64_FEATURE_1_BTI property"); - - checkAndReportMissingFeature( - ctx, ctx.arg.zGcsReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_GCS, - toStr(ctx, f) + ": -z gcs-report: file does not have " - "GNU_PROPERTY_AARCH64_FEATURE_1_GCS property"); - - checkAndReportMissingFeature( - ctx, ctx.arg.zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_IBT, - toStr(ctx, f) + ": -z cet-report: file does not have " - "GNU_PROPERTY_X86_FEATURE_1_IBT property"); - - checkAndReportMissingFeature( - ctx, ctx.arg.zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_SHSTK, - toStr(ctx, f) + ": -z cet-report: file does not have " - "GNU_PROPERTY_X86_FEATURE_1_SHSTK property"); + reportUnless(ctx.arg.zBtiReport, + features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) + << f + << ": -z bti-report: file does not have " + "GNU_PROPERTY_AARCH64_FEATURE_1_BTI property"; + + reportUnless(ctx.arg.zGcsReport, + features & GNU_PROPERTY_AARCH64_FEATURE_1_GCS) + << f + << ": -z gcs-report: file does not have " + "GNU_PROPERTY_AARCH64_FEATURE_1_GCS property"; + + reportUnless(ctx.arg.zCetReport, features & GNU_PROPERTY_X86_FEATURE_1_IBT) + << f + << ": -z cet-report: file does not have " + "GNU_PROPERTY_X86_FEATURE_1_IBT property"; + + reportUnless(ctx.arg.zCetReport, + features & GNU_PROPERTY_X86_FEATURE_1_SHSTK) + << f + << ": -z cet-report: file does not have " + "GNU_PROPERTY_X86_FEATURE_1_SHSTK property"; if (ctx.arg.zForceBti && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) { features |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI; @@ -2798,11 +2798,11 @@ static void readSecurityNotes(Ctx &ctx) { continue; if (f->aarch64PauthAbiCoreInfo.empty()) { - reportMissingFeature(ctx, ctx.arg.zPauthReport, - toStr(ctx, f) + - ": -z pauth-report: file does not have AArch64 " - "PAuth core info while '" + - referenceFileName + "' has one"); + report(ctx.arg.zPauthReport) + << f + << ": -z pauth-report: file does not have AArch64 " + "PAuth core info while '" + << referenceFileName << "' has one"; continue; } diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 0278c070b24734..4c88723f090d08 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -174,6 +174,7 @@ std::string elf::createResponseFile(const opt::InputArgList &args) { break; case OPT_o: case OPT_Map: + case OPT_dependency_file: case OPT_print_archive_stats: case OPT_why_extract: // If an output path contains directories, "lld @response.txt" will diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 7090ca779b0e7b..606953e94bbadd 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -542,7 +542,7 @@ template void ICF::run() { }); } while (repeat); - Log(ctx) << "ICF needed " << Twine(cnt) << " iterations"; + Log(ctx) << "ICF needed " << cnt << " iterations"; // Merge sections by the equivalence class. forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 3a0ae43b813f48..83a25e1b66cff0 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -133,8 +133,7 @@ static void updateARMVFPArgs(Ctx &ctx, const ARMAttributeParser &attributes, // Object compatible with all conventions. return; default: - ErrAlways(ctx) << f - << ": unknown Tag_ABI_VFP_args value: " << Twine(vfpArgs); + ErrAlways(ctx) << f << ": unknown Tag_ABI_VFP_args value: " << vfpArgs; return; } // Follow ld.bfd and error if there is a mix of calling conventions. @@ -284,7 +283,7 @@ static bool isCompatible(Ctx &ctx, InputFile *file) { StringRef target = !ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation; if (!target.empty()) { - ErrAlways(ctx) << file << " is incompatible with " << target; + Err(ctx) << file << " is incompatible with " << target; return false; } @@ -295,10 +294,10 @@ static bool isCompatible(Ctx &ctx, InputFile *file) { existing = ctx.sharedFiles[0]; else if (!ctx.bitcodeFiles.empty()) existing = ctx.bitcodeFiles[0]; - std::string with; + auto diag = Err(ctx); + diag << file << " is incompatible"; if (existing) - with = " with " + toStr(ctx, existing); - ErrAlways(ctx) << file << " is incompatible" << with; + diag << " with " << existing; return false; } @@ -691,8 +690,7 @@ template void ObjFile::parse(bool ignoreComdats) { // Otherwise, discard group members. for (uint32_t secIndex : entries.slice(1)) { if (secIndex >= size) - Fatal(ctx) << this - << ": invalid section index in group: " << Twine(secIndex); + Fatal(ctx) << this << ": invalid section index in group: " << secIndex; this->sections[secIndex] = &InputSection::discarded; } } @@ -748,8 +746,8 @@ bool ObjFile::shouldMerge(const Elf_Shdr &sec, StringRef name) { return false; if (sec.sh_size % entSize) Fatal(ctx) << this << ":(" << name << "): SHF_MERGE section size (" - << Twine(sec.sh_size) << ") must be a multiple of sh_entsize (" - << Twine(entSize) << ")"; + << uint64_t(sec.sh_size) + << ") must be a multiple of sh_entsize (" << entSize << ")"; if (sec.sh_flags & SHF_WRITE) Fatal(ctx) << this << ":(" << name @@ -810,7 +808,7 @@ void ObjFile::initializeSections(bool ignoreComdats, Warn(ctx) << this << ": --icf=safe conservatively ignores " "SHT_LLVM_ADDRSIG [index " - << Twine(i) + << i << "] with sh_link=0 " "(likely created using objcopy or ld -r)"; } @@ -903,9 +901,9 @@ void ObjFile::initializeSections(bool ignoreComdats, // simply handle such sections as non-mergeable ones. Degrading like this // is acceptable because section merging is optional. if (auto *ms = dyn_cast(s)) { - s = makeThreadLocal( - ms->file, ms->flags, ms->type, ms->addralign, - ms->contentMaybeDecompress(), ms->name); + s = makeThreadLocal(ms->file, ms->name, ms->type, + ms->flags, ms->addralign, ms->entsize, + ms->contentMaybeDecompress()); sections[info] = s; } @@ -939,7 +937,8 @@ void ObjFile::initializeSections(bool ignoreComdats, if (sec.sh_link < size) linkSec = this->sections[sec.sh_link]; if (!linkSec) - Fatal(ctx) << this << ": invalid sh_link index: " << Twine(sec.sh_link); + Fatal(ctx) << this + << ": invalid sh_link index: " << uint32_t(sec.sh_link); // A SHF_LINK_ORDER section is discarded if its linked-to section is // discarded. @@ -1167,7 +1166,7 @@ void ObjFile::initializeSymbols(const object::ELFFile &obj) { if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { if (value == 0 || value >= UINT32_MAX) Fatal(ctx) << this << ": common symbol '" << sym->getName() - << "' has invalid alignment: " << Twine(value); + << "' has invalid alignment: " << value; hasCommonSyms = true; sym->resolve(ctx, CommonSymbol{ctx, this, StringRef(), binding, stOther, type, value, size}); @@ -1214,7 +1213,7 @@ void ObjFile::initSectionsAndLocalSyms(bool ignoreComdats) { else if (secIdx >= SHN_LORESERVE) secIdx = 0; if (LLVM_UNLIKELY(secIdx >= sections.size())) - Fatal(ctx) << this << ": invalid section index: " << Twine(secIdx); + Fatal(ctx) << this << ": invalid section index: " << secIdx; if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) ErrAlways(ctx) << this << ": non-local symbol (" << i << ") found at index < .symtab's sh_info (" << end << ")"; @@ -1274,7 +1273,7 @@ template void ObjFile::postParse() { else if (secIdx >= SHN_LORESERVE) secIdx = 0; if (LLVM_UNLIKELY(secIdx >= sections.size())) - Fatal(ctx) << this << ": invalid section index: " << Twine(secIdx); + Fatal(ctx) << this << ": invalid section index: " << secIdx; InputSectionBase *sec = sections[secIdx]; if (sec == &InputSection::discarded) { if (sym.traced) { @@ -1577,8 +1576,8 @@ template void SharedFile::parse() { // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { if (idx >= verneeds.size()) { - ErrAlways(ctx) << "corrupt input file: version need index " - << Twine(idx) << " for symbol " << name + ErrAlways(ctx) << "corrupt input file: version need index " << idx + << " for symbol " << name << " is out of bounds\n>>> defined in " << this; continue; } @@ -1602,8 +1601,8 @@ template void SharedFile::parse() { // VER_NDX_LOCAL. Workaround this bug. if (ctx.arg.emachine == EM_MIPS && name == "_gp_disp") continue; - ErrAlways(ctx) << "corrupt input file: version definition index " - << Twine(idx) << " for symbol " << name + ErrAlways(ctx) << "corrupt input file: version definition index " << idx + << " for symbol " << name << " is out of bounds\n>>> defined in " << this; continue; } @@ -1849,8 +1848,9 @@ void BitcodeFile::postParse() { void BinaryFile::parse() { ArrayRef data = arrayRefFromStringRef(mb.getBuffer()); - auto *section = make(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - 8, data, ".data"); + auto *section = + make(this, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + /*addralign=*/8, /*entsize=*/0, data); sections.push_back(section); // For each input file foo that is embedded to a result as a binary diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 1221f56dfe68a6..75121285b7b23d 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -52,13 +52,14 @@ static ArrayRef getSectionContents(ObjFile &file, return check(file.getObj().getSectionContents(hdr)); } -InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, - uint32_t type, uint64_t entsize, - uint32_t link, uint32_t info, - uint32_t addralign, ArrayRef data, - StringRef name, Kind sectionKind) - : SectionBase(sectionKind, file, name, flags, entsize, addralign, type, - info, link), +InputSectionBase::InputSectionBase(InputFile *file, StringRef name, + uint32_t type, uint64_t flags, uint32_t link, + uint32_t info, uint32_t addralign, + uint32_t entsize, ArrayRef data, + Kind sectionKind) + : SectionBase(sectionKind, file, name, type, flags, link, info, addralign, + entsize), + bss(0), decodedCrel(0), keepUnique(0), nopFiller(0), content_(data.data()), size(data.size()) { // In order to reduce memory allocation, we assume that mergeable // sections are smaller than 4 GiB, which is not an unreasonable @@ -95,10 +96,10 @@ template InputSectionBase::InputSectionBase(ObjFile &file, const typename ELFT::Shdr &hdr, StringRef name, Kind sectionKind) - : InputSectionBase(&file, getFlags(file.ctx, hdr.sh_flags), hdr.sh_type, - hdr.sh_entsize, hdr.sh_link, hdr.sh_info, - hdr.sh_addralign, getSectionContents(file, hdr), name, - sectionKind) { + : InputSectionBase(&file, name, hdr.sh_type, + getFlags(file.ctx, hdr.sh_flags), hdr.sh_link, + hdr.sh_info, hdr.sh_addralign, hdr.sh_entsize, + getSectionContents(file, hdr), sectionKind) { // We reject object files having insanely large alignments even though // they are allowed by the spec. I think 4GB is a reasonable limitation. // We might want to relax this in the future. @@ -273,7 +274,7 @@ void InputSectionBase::parseCompressedHeader(Ctx &ctx) { "not built with zstd support"; } else { ErrAlways(ctx) << this << ": unsupported compression type (" - << Twine(hdr->ch_type) << ")"; + << uint32_t(hdr->ch_type) << ")"; return; } @@ -355,18 +356,19 @@ std::string InputSectionBase::getObjMsg(uint64_t off) const { PotentialSpillSection::PotentialSpillSection(const InputSectionBase &source, InputSectionDescription &isd) - : InputSection(source.file, source.flags, source.type, source.addralign, {}, - source.name, SectionBase::Spill), + : InputSection(source.file, source.name, source.type, source.flags, + source.addralign, source.addralign, {}, SectionBase::Spill), isd(&isd) {} -InputSection InputSection::discarded(nullptr, 0, 0, 0, ArrayRef(), ""); +InputSection InputSection::discarded(nullptr, "", 0, 0, 0, 0, + ArrayRef()); -InputSection::InputSection(InputFile *f, uint64_t flags, uint32_t type, - uint32_t addralign, ArrayRef data, - StringRef name, Kind k) - : InputSectionBase(f, flags, type, - /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, addralign, data, - name, k) { +InputSection::InputSection(InputFile *f, StringRef name, uint32_t type, + uint64_t flags, uint32_t addralign, uint32_t entsize, + ArrayRef data, Kind k) + : InputSectionBase(f, name, type, flags, + /*link=*/0, /*info=*/0, addralign, /*entsize=*/entsize, + data, k) { assert(f || this == &InputSection::discarded); } @@ -1092,7 +1094,7 @@ void InputSection::relocateNonAlloc(Ctx &ctx, uint8_t *buf, // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC // sections. if (LLVM_LIKELY(expr == R_ABS) || expr == R_DTPREL || expr == R_GOTPLTREL || - expr == R_RISCV_ADD) { + expr == R_RISCV_ADD || expr == R_ARM_SBREL) { target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(ctx, addend))); continue; @@ -1437,12 +1439,13 @@ MergeInputSection::MergeInputSection(ObjFile &f, StringRef name) : InputSectionBase(f, header, name, InputSectionBase::Merge) {} -MergeInputSection::MergeInputSection(Ctx &ctx, uint64_t flags, uint32_t type, - uint64_t entsize, ArrayRef data, - StringRef name) - : InputSectionBase(ctx.internalFile, flags, type, entsize, /*link=*/0, +MergeInputSection::MergeInputSection(Ctx &ctx, StringRef name, uint32_t type, + uint64_t flags, uint64_t entsize, + ArrayRef data) + : InputSectionBase(ctx.internalFile, name, type, flags, /*link=*/0, /*info=*/0, - /*alignment=*/entsize, data, name, SectionBase::Merge) {} + /*addralign=*/entsize, entsize, data, + SectionBase::Merge) {} // This function is called after we obtain a complete list of input sections // that need to be linked. This is responsible to split section contents diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 303452fed60d86..268caa547ffed9 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -59,25 +59,17 @@ template struct RelsOrRelas { // sections. class SectionBase { public: - enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output, Class }; - - Kind kind() const { return (Kind)sectionKind; } - - LLVM_PREFERRED_TYPE(Kind) - uint8_t sectionKind : 3; - - // The next two bit fields are only used by InputSectionBase, but we - // put them here so the struct packs better. - - LLVM_PREFERRED_TYPE(bool) - uint8_t bss : 1; - - // Set for sections that should not be folded by ICF. - LLVM_PREFERRED_TYPE(bool) - uint8_t keepUnique : 1; + enum Kind : uint8_t { + Regular, + Synthetic, + Spill, + EHFrame, + Merge, + Output, + Class, + }; - uint8_t partition = 1; - uint32_t type; + Kind kind() const { return sectionKind; } // The file which contains this section. For InputSectionBase, its dynamic // type is usually ObjFile, but may be an InputFile of InternalKind @@ -93,10 +85,17 @@ class SectionBase { // These corresponds to the fields in Elf_Shdr. uint64_t flags; - uint32_t addralign; - uint32_t entsize; + uint32_t type; uint32_t link; uint32_t info; + uint32_t addralign; + uint32_t entsize; + + Kind sectionKind; + uint8_t partition = 1; + + // The next two bit fields are only used by InputSectionBase, but we + // put them here so the struct packs better. Ctx &getCtx() const; OutputSection *getOutputSection(); @@ -116,11 +115,11 @@ class SectionBase { protected: constexpr SectionBase(Kind sectionKind, InputFile *file, StringRef name, - uint64_t flags, uint32_t entsize, uint32_t addralign, - uint32_t type, uint32_t info, uint32_t link) - : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), - file(file), name(name), flags(flags), addralign(addralign), - entsize(entsize), link(link), info(info) {} + uint32_t type, uint64_t flags, uint32_t link, + uint32_t info, uint32_t addralign, uint32_t entsize) + : file(file), name(name), flags(flags), type(type), link(link), + info(info), addralign(addralign), entsize(entsize), + sectionKind(sectionKind) {} }; struct SymbolAnchor { @@ -148,15 +147,34 @@ class InputSectionBase : public SectionBase { InputSectionBase(ObjFile &file, const typename ELFT::Shdr &header, StringRef name, Kind sectionKind); - InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, - uint64_t entsize, uint32_t link, uint32_t info, - uint32_t addralign, ArrayRef data, StringRef name, + InputSectionBase(InputFile *file, StringRef name, uint32_t type, + uint64_t flags, uint32_t link, uint32_t info, + uint32_t addralign, uint32_t entsize, ArrayRef data, Kind sectionKind); static bool classof(const SectionBase *s) { return s->kind() != Output && s->kind() != Class; } + LLVM_PREFERRED_TYPE(bool) + uint8_t bss : 1; + + // Whether this section is SHT_CREL and has been decoded to RELA by + // relsOrRelas. + LLVM_PREFERRED_TYPE(bool) + uint8_t decodedCrel : 1; + + // Set for sections that should not be folded by ICF. + LLVM_PREFERRED_TYPE(bool) + uint8_t keepUnique : 1; + + // Whether the section needs to be padded with a NOP filler due to + // deleteFallThruJmpInsn. + LLVM_PREFERRED_TYPE(bool) + uint8_t nopFiller : 1; + + mutable bool compressed = false; + // Input sections are part of an output section. Special sections // like .eh_frame and merge sections are first combined into a // synthetic section that is then added to an output section. In all @@ -176,16 +194,6 @@ class InputSectionBase : public SectionBase { // be reset to zero after uses. uint32_t bytesDropped = 0; - mutable bool compressed = false; - - // Whether this section is SHT_CREL and has been decoded to RELA by - // relsOrRelas. - bool decodedCrel = false; - - // Whether the section needs to be padded with a NOP filler due to - // deleteFallThruJmpInsn. - bool nopFiller = false; - void drop_back(unsigned num) { assert(bytesDropped + num < 256); bytesDropped += num; @@ -315,8 +323,8 @@ class MergeInputSection : public InputSectionBase { template MergeInputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name); - MergeInputSection(Ctx &, uint64_t flags, uint32_t type, uint64_t entsize, - ArrayRef data, StringRef name); + MergeInputSection(Ctx &, StringRef name, uint32_t type, uint64_t flags, + uint64_t entsize, ArrayRef data); static bool classof(const SectionBase *s) { return s->kind() == Merge; } void splitIntoPieces(); @@ -394,8 +402,9 @@ class EhInputSection : public InputSectionBase { // .eh_frame. It also includes the synthetic sections themselves. class InputSection : public InputSectionBase { public: - InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, - ArrayRef data, StringRef name, Kind k = Regular); + InputSection(InputFile *f, StringRef name, uint32_t type, uint64_t flags, + uint32_t addralign, uint32_t entsize, ArrayRef data, + Kind k = Regular); template InputSection(ObjFile &f, const typename ELFT::Shdr &header, StringRef name); @@ -466,15 +475,17 @@ class PotentialSpillSection : public InputSection { } }; -static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); +#ifndef _WIN32 +static_assert(sizeof(InputSection) <= 152, "InputSection is too big"); +#endif class SyntheticSection : public InputSection { public: Ctx &ctx; - SyntheticSection(Ctx &ctx, uint64_t flags, uint32_t type, uint32_t addralign, - StringRef name) - : InputSection(ctx.internalFile, flags, type, addralign, {}, name, - InputSectionBase::Synthetic), + SyntheticSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags, + uint32_t addralign) + : InputSection(ctx.internalFile, name, type, flags, addralign, + /*entsize=*/0, {}, InputSectionBase::Synthetic), ctx(ctx) {} virtual ~SyntheticSection() = default; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index d8aa2c46cfa5b3..7d24c6750b0d10 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -145,7 +145,9 @@ OutputDesc *LinkerScript::createOutputSection(StringRef name, // There was a forward reference. sec = secRef; } else { - sec = make(ctx, name, SHT_PROGBITS, 0); + descPool.emplace_back( + std::make_unique(ctx, name, SHT_PROGBITS, 0)); + sec = descPool.back().get(); if (!secRef) secRef = sec; } @@ -154,10 +156,14 @@ OutputDesc *LinkerScript::createOutputSection(StringRef name, } OutputDesc *LinkerScript::getOrCreateOutputSection(StringRef name) { - OutputDesc *&cmdRef = nameToOutputSection[CachedHashStringRef(name)]; - if (!cmdRef) - cmdRef = make(ctx, name, SHT_PROGBITS, 0); - return cmdRef; + auto &secRef = nameToOutputSection[CachedHashStringRef(name)]; + if (!secRef) { + secRef = descPool + .emplace_back( + std::make_unique(ctx, name, SHT_PROGBITS, 0)) + .get(); + } + return secRef; } // Expands the memory region by the specified size. @@ -1778,7 +1784,7 @@ static void checkMemoryRegion(Ctx &ctx, const MemoryRegion *region, if (osecEnd > regionEnd) { ErrAlways(ctx) << "section '" << osec->name << "' will not fit in region '" << region->name << "': overflowed by " - << Twine(osecEnd - regionEnd) << " bytes"; + << (osecEnd - regionEnd) << " bytes"; } } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index f5408b4ba3037e..328368fd3b4333 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -299,6 +299,7 @@ class LinkerScript final { }; Ctx &ctx; + SmallVector, 0> descPool; llvm::DenseMap nameToOutputSection; StringRef getOutputSectionName(const InputSectionBase *s) const; diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index f18d799a8c4e4b..138d35951a3bbc 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -59,7 +59,9 @@ static std::vector getSymbols(Ctx &ctx) { for (Symbol *b : file->getSymbols()) if (auto *dr = dyn_cast(b)) if (!dr->isSection() && dr->section && dr->section->isLive() && - (dr->file == file || dr->hasFlag(NEEDS_COPY) || dr->section->bss)) + (dr->file == file || dr->hasFlag(NEEDS_COPY) || + (isa(dr->section) && + cast(dr->section)->bss))) v.push_back(dr); return v; } diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 9bcbea250e7db7..31d14df9be71e9 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -67,9 +67,8 @@ void OutputSection::writeHeaderTo(typename ELFT::Shdr *shdr) { OutputSection::OutputSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags) - : SectionBase(Output, ctx.internalFile, name, flags, /*entsize=*/0, - /*addralign=*/1, type, - /*info=*/0, /*link=*/0), + : SectionBase(Output, ctx.internalFile, name, type, flags, /*link=*/0, + /*info=*/0, /*addralign=*/1, /*entsize=*/0), ctx(ctx) {} uint64_t OutputSection::getLMA() const { diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e110adead5ad01..d311dba41741c0 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -74,13 +74,12 @@ static std::optional getLinkerScriptLocation(Ctx &ctx, return std::nullopt; } -static std::string getDefinedLocation(Ctx &ctx, const Symbol &sym) { - const char msg[] = "\n>>> defined in "; +static void printDefinedLocation(ELFSyncStream &s, const Symbol &sym) { + s << "\n>>> defined in "; if (sym.file) - return msg + toStr(ctx, sym.file); - if (std::optional loc = getLinkerScriptLocation(ctx, sym)) - return msg + *loc; - return ""; + return void(s << sym.file); + if (std::optional loc = getLinkerScriptLocation(s.ctx, sym)) + return void(s << *loc); } // Construct a message in the following format. @@ -88,13 +87,14 @@ static std::string getDefinedLocation(Ctx &ctx, const Symbol &sym) { // >>> defined in /home/alice/src/foo.o // >>> referenced by bar.c:12 (/home/alice/src/bar.c:12) // >>> /home/alice/src/bar.o:(.text+0x1) -static std::string getLocation(Ctx &ctx, InputSectionBase &s, const Symbol &sym, - uint64_t off) { - std::string msg = getDefinedLocation(ctx, sym) + "\n>>> referenced by "; - std::string src = s.getSrcMsg(sym, off); +static void printLocation(ELFSyncStream &s, InputSectionBase &sec, + const Symbol &sym, uint64_t off) { + printDefinedLocation(s, sym); + s << "\n>>> referenced by "; + std::string src = sec.getSrcMsg(sym, off); if (!src.empty()) - msg += src + "\n>>> "; - return msg + s.getObjMsg(off); + s << src << "\n>>> "; + s << sec.getObjMsg(off); } void elf::reportRangeError(Ctx &ctx, uint8_t *loc, const Relocation &rel, @@ -121,7 +121,7 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, const Relocation &rel, if (!errPlace.srcLoc.empty()) diag << "\n>>> referenced by " << errPlace.srcLoc; if (rel.sym && !rel.sym->isSection()) - diag << getDefinedLocation(ctx, *rel.sym); + printDefinedLocation(diag, *rel.sym); if (errPlace.isec && errPlace.isec->name.starts_with(".debug")) diag << "; consider recompiling with -fdebug-types-section to reduce size " @@ -133,8 +133,10 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, int64_t v, int n, auto diag = Err(ctx); diag << getErrorPlace(ctx, loc).loc << msg << " is out of range: " << v << " is not in [" << llvm::minIntN(n) << ", " << llvm::maxIntN(n) << "]"; - if (!sym.getName().empty()) - diag << "; references '" << &sym << '\'' << getDefinedLocation(ctx, sym); + if (!sym.getName().empty()) { + diag << "; references '" << &sym << '\''; + printDefinedLocation(diag, sym); + } } // Build a bitmask with one bit set for each 64 subset of RelExpr. @@ -522,42 +524,39 @@ int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr, // Custom error message if Sym is defined in a discarded section. template -static std::string maybeReportDiscarded(Ctx &ctx, Undefined &sym) { +static void maybeReportDiscarded(Ctx &ctx, ELFSyncStream &msg, Undefined &sym) { auto *file = dyn_cast_or_null>(sym.file); if (!file || !sym.discardedSecIdx) - return ""; + return; ArrayRef objSections = file->template getELFShdrs(); - std::string msg; if (sym.type == ELF::STT_SECTION) { - msg = "relocation refers to a discarded section: "; - msg += CHECK2( + msg << "relocation refers to a discarded section: "; + msg << CHECK2( file->getObj().getSectionName(objSections[sym.discardedSecIdx]), file); } else { - msg = "relocation refers to a symbol in a discarded section: " + - toStr(ctx, sym); + msg << "relocation refers to a symbol in a discarded section: " << &sym; } - msg += "\n>>> defined in " + toStr(ctx, file); + msg << "\n>>> defined in " << file; Elf_Shdr_Impl elfSec = objSections[sym.discardedSecIdx - 1]; if (elfSec.sh_type != SHT_GROUP) - return msg; + return; // If the discarded section is a COMDAT. StringRef signature = file->getShtGroupSignature(objSections, elfSec); if (const InputFile *prevailing = ctx.symtab->comdatGroups.lookup(CachedHashStringRef(signature))) { - msg += "\n>>> section group signature: " + signature.str() + - "\n>>> prevailing definition is in " + toStr(ctx, prevailing); + msg << "\n>>> section group signature: " << signature + << "\n>>> prevailing definition is in " << prevailing; if (sym.nonPrevailing) { - msg += "\n>>> or the symbol in the prevailing group had STB_WEAK " + msg << "\n>>> or the symbol in the prevailing group had STB_WEAK " "binding and the symbol in a non-prevailing group had STB_GLOBAL " "binding. Mixing groups with STB_WEAK and STB_GLOBAL binding " "signature is not supported"; } } - return msg; } // Check whether the definition name def is a mangled function name that matches @@ -695,8 +694,9 @@ static const Symbol *getAlternativeSpelling(Ctx &ctx, const Undefined &sym, static void reportUndefinedSymbol(Ctx &ctx, const UndefinedDiag &undef, bool correctSpelling) { Undefined &sym = *undef.sym; + ELFSyncStream msg(ctx, DiagLevel::None); - auto visibility = [&]() -> std::string { + auto visibility = [&]() { switch (sym.visibility()) { case STV_INTERNAL: return "internal "; @@ -709,75 +709,70 @@ static void reportUndefinedSymbol(Ctx &ctx, const UndefinedDiag &undef, } }; - std::string msg; switch (ctx.arg.ekind) { case ELF32LEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF32BEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF64LEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; case ELF64BEKind: - msg = maybeReportDiscarded(ctx, sym); + maybeReportDiscarded(ctx, msg, sym); break; default: llvm_unreachable(""); } - if (msg.empty()) - msg = "undefined " + visibility() + "symbol: " + toStr(ctx, sym); + if (msg.str().empty()) + msg << "undefined " << visibility() << "symbol: " << &sym; const size_t maxUndefReferences = 3; - size_t i = 0; - for (UndefinedDiag::Loc l : undef.locs) { - if (i >= maxUndefReferences) - break; + for (UndefinedDiag::Loc l : + ArrayRef(undef.locs).take_front(maxUndefReferences)) { InputSectionBase &sec = *l.sec; uint64_t offset = l.offset; - msg += "\n>>> referenced by "; + msg << "\n>>> referenced by "; // In the absence of line number information, utilize DW_TAG_variable (if // present) for the enclosing symbol (e.g. var in `int *a[] = {&undef};`). Symbol *enclosing = sec.getEnclosingSymbol(offset); std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset); if (!src.empty()) - msg += src + "\n>>> "; - msg += sec.getObjMsg(offset); - i++; + msg << src << "\n>>> "; + msg << sec.getObjMsg(offset); } - if (i < undef.locs.size()) - msg += ("\n>>> referenced " + Twine(undef.locs.size() - i) + " more times") - .str(); + if (maxUndefReferences < undef.locs.size()) + msg << "\n>>> referenced " << (undef.locs.size() - maxUndefReferences) + << " more times"; if (correctSpelling) { std::string pre_hint = ": ", post_hint; if (const Symbol *corrected = getAlternativeSpelling(ctx, sym, pre_hint, post_hint)) { - msg += - "\n>>> did you mean" + pre_hint + toStr(ctx, *corrected) + post_hint; + msg << "\n>>> did you mean" << pre_hint << corrected << post_hint; if (corrected->file) - msg += "\n>>> defined in: " + toStr(ctx, corrected->file); + msg << "\n>>> defined in: " << corrected->file; } } if (sym.getName().starts_with("_ZTV")) - msg += - "\n>>> the vtable symbol may be undefined because the class is missing " - "its key function (see https://lld.llvm.org/missingkeyfunction)"; + msg << "\n>>> the vtable symbol may be undefined because the class is " + "missing its key function " + "(see https://lld.llvm.org/missingkeyfunction)"; if (ctx.arg.gcSections && ctx.arg.zStartStopGC && sym.getName().starts_with("__start_")) { - msg += "\n>>> the encapsulation symbol needs to be retained under " + msg << "\n>>> the encapsulation symbol needs to be retained under " "--gc-sections properly; consider -z nostart-stop-gc " "(see https://lld.llvm.org/ELF/start-stop-gc)"; } if (undef.isWarning) - Warn(ctx) << msg; + Warn(ctx) << msg.str(); else - ctx.e.error(msg, ErrorTag::SymbolNotFound, {sym.getName()}); + ctx.e.error(msg.str(), ErrorTag::SymbolNotFound, {sym.getName()}); } void elf::reportUndefinedSymbols(Ctx &ctx) { @@ -1020,9 +1015,9 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (sym.scriptDefined) return true; - Err(ctx) << "relocation " << type - << " cannot refer to absolute symbol: " << &sym - << getLocation(ctx, *sec, sym, relOff); + auto diag = Err(ctx); + diag << "relocation " << type << " cannot refer to absolute symbol: " << &sym; + printLocation(diag, *sec, sym, relOff); return true; } @@ -1188,18 +1183,21 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, if (!ctx.arg.shared && sym.isShared() && !(ctx.arg.emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64)) { if (!canDefineSymbolInExecutable(ctx, sym)) { - Err(ctx) << "cannot preempt symbol: " << &sym - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "cannot preempt symbol: " << &sym; + printLocation(diag, *sec, sym, offset); return; } if (sym.isObject()) { // Produce a copy relocation. if (auto *ss = dyn_cast(&sym)) { - if (!ctx.arg.zCopyreloc) - Err(ctx) << "unresolvable relocation " << type << " against symbol '" - << ss << "'; recompile with -fPIC or remove '-z nocopyreloc'" - << getLocation(ctx, *sec, sym, offset); + if (!ctx.arg.zCopyreloc) { + auto diag = Err(ctx); + diag << "unresolvable relocation " << type << " against symbol '" + << ss << "'; recompile with -fPIC or remove '-z nocopyreloc'"; + printLocation(diag, *sec, sym, offset); + } sym.setFlags(NEEDS_COPY); } sec->addReloc({expr, type, offset, addend, &sym}); @@ -1234,20 +1232,26 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // * If a library definition gets preempted to the executable, it will have // the wrong ebx value. if (sym.isFunc()) { - if (ctx.arg.pie && ctx.arg.emachine == EM_386) - Err(ctx) << "symbol '" << &sym - << "' cannot be preempted; recompile with -fPIE" - << getLocation(ctx, *sec, sym, offset); + if (ctx.arg.pie && ctx.arg.emachine == EM_386) { + auto diag = Err(ctx); + diag << "symbol '" << &sym + << "' cannot be preempted; recompile with -fPIE"; + printLocation(diag, *sec, sym, offset); + } sym.setFlags(NEEDS_COPY | NEEDS_PLT); sec->addReloc({expr, type, offset, addend, &sym}); return; } } - Err(ctx) << "relocation " << type << " cannot be used against " - << (sym.getName().empty() ? "local symbol" - : ("symbol '" + toStr(ctx, sym) + "'")) - << "; recompile with -fPIC" << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "relocation " << type << " cannot be used against "; + if (sym.getName().empty()) + diag << "local symbol"; + else + diag << "symbol '" << &sym << "'"; + diag << "; recompile with -fPIC"; + printLocation(diag, *sec, sym, offset); } // This function is similar to the `handleTlsRelocation`. MIPS does not @@ -1284,9 +1288,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, int64_t addend) { if (expr == R_TPREL || expr == R_TPREL_NEG) { if (ctx.arg.shared) { - Err(ctx) << "relocation " << type << " against " << &sym - << " cannot be used with -shared" - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "relocation " << type << " against " << &sym + << " cannot be used with -shared"; + printLocation(diag, *sec, sym, offset); return 1; } return 0; @@ -1493,9 +1498,10 @@ void RelocationScanner::scanOne(typename Relocs::const_iterator &i) { // Skip the error check for CREL, which does not set `end`. if constexpr (!RelTy::IsCrel) { if (i == end) { - Err(ctx) << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " - "relocation" - << getLocation(ctx, *sec, sym, offset); + auto diag = Err(ctx); + diag << "R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last " + "relocation"; + printLocation(diag, *sec, sym, offset); return; } } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 7e5e713513c473..21fe2a25fa1bd2 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -89,8 +89,8 @@ static ArrayRef getVersion(Ctx &ctx) { // The returned object is a mergeable string section. MergeInputSection *elf::createCommentSection(Ctx &ctx) { auto *sec = - make(ctx, SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1, - getVersion(ctx), ".comment"); + make(ctx, ".comment", SHT_PROGBITS, + SHF_MERGE | SHF_STRINGS, 1, getVersion(ctx)); sec->splitIntoPieces(); return sec; } @@ -99,7 +99,7 @@ MergeInputSection *elf::createCommentSection(Ctx &ctx) { template MipsAbiFlagsSection::MipsAbiFlagsSection(Ctx &ctx, Elf_Mips_ABIFlags flags) - : SyntheticSection(ctx, SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), + : SyntheticSection(ctx, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, 8), flags(flags) { this->entsize = sizeof(Elf_Mips_ABIFlags); } @@ -120,23 +120,20 @@ MipsAbiFlagsSection::create(Ctx &ctx) { sec->markDead(); create = true; - std::string filename = toStr(ctx, sec->file); const size_t size = sec->content().size(); // Older version of BFD (such as the default FreeBSD linker) concatenate // .MIPS.abiflags instead of merging. To allow for this case (or potential // zero padding) we ignore everything after the first Elf_Mips_ABIFlags if (size < sizeof(Elf_Mips_ABIFlags)) { - ErrAlways(ctx) << filename - << ": invalid size of .MIPS.abiflags section: got " - << Twine(size) << " instead of " - << Twine(sizeof(Elf_Mips_ABIFlags)); + Err(ctx) << sec->file << ": invalid size of .MIPS.abiflags section: got " + << size << " instead of " << sizeof(Elf_Mips_ABIFlags); return nullptr; } auto *s = reinterpret_cast(sec->content().data()); if (s->version != 0) { - ErrAlways(ctx) << filename << ": unexpected .MIPS.abiflags version " - << Twine(s->version); + Err(ctx) << sec->file << ": unexpected .MIPS.abiflags version " + << s->version; return nullptr; } @@ -152,7 +149,7 @@ MipsAbiFlagsSection::create(Ctx &ctx) { flags.flags1 |= s->flags1; flags.flags2 |= s->flags2; flags.fp_abi = - elf::getMipsFpAbiFlag(ctx, flags.fp_abi, s->fp_abi, filename); + elf::getMipsFpAbiFlag(ctx, sec->file, flags.fp_abi, s->fp_abi); }; if (create) @@ -163,7 +160,7 @@ MipsAbiFlagsSection::create(Ctx &ctx) { // .MIPS.options section. template MipsOptionsSection::MipsOptionsSection(Ctx &ctx, Elf_Mips_RegInfo reginfo) - : SyntheticSection(ctx, SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), + : SyntheticSection(ctx, ".MIPS.options", SHT_MIPS_OPTIONS, SHF_ALLOC, 8), reginfo(reginfo) { this->entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); } @@ -197,12 +194,10 @@ MipsOptionsSection::create(Ctx &ctx) { for (InputSectionBase *sec : sections) { sec->markDead(); - std::string filename = toStr(ctx, sec->file); ArrayRef d = sec->content(); - while (!d.empty()) { if (d.size() < sizeof(Elf_Mips_Options)) { - ErrAlways(ctx) << filename << ": invalid size of .MIPS.options section"; + Err(ctx) << sec->file << ": invalid size of .MIPS.options section"; break; } @@ -213,8 +208,10 @@ MipsOptionsSection::create(Ctx &ctx) { break; } - if (!opt->size) - Fatal(ctx) << filename << ": zero option descriptor size"; + if (!opt->size) { + Err(ctx) << sec->file << ": zero option descriptor size"; + break; + } d = d.slice(opt->size); } }; @@ -225,7 +222,7 @@ MipsOptionsSection::create(Ctx &ctx) { // MIPS .reginfo section. template MipsReginfoSection::MipsReginfoSection(Ctx &ctx, Elf_Mips_RegInfo reginfo) - : SyntheticSection(ctx, SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), + : SyntheticSection(ctx, ".reginfo", SHT_MIPS_REGINFO, SHF_ALLOC, 4), reginfo(reginfo) { this->entsize = sizeof(Elf_Mips_RegInfo); } @@ -256,7 +253,7 @@ MipsReginfoSection::create(Ctx &ctx) { sec->markDead(); if (sec->content().size() != sizeof(Elf_Mips_RegInfo)) { - ErrAlways(ctx) << sec->file << ": invalid size of .reginfo section"; + Err(ctx) << sec->file << ": invalid size of .reginfo section"; return nullptr; } @@ -273,8 +270,9 @@ InputSection *elf::createInterpSection(Ctx &ctx) { StringRef s = ctx.saver.save(ctx.arg.dynamicLinker); ArrayRef contents = {(const uint8_t *)s.data(), s.size() + 1}; - return make(ctx.internalFile, SHF_ALLOC, SHT_PROGBITS, 1, - contents, ".interp"); + return make(ctx.internalFile, ".interp", SHT_PROGBITS, + SHF_ALLOC, + /*addralign=*/1, /*entsize=*/0, contents); } Defined *elf::addSyntheticLocal(Ctx &ctx, StringRef name, uint8_t type, @@ -323,8 +321,8 @@ static size_t getHashSize(Ctx &ctx) { // sets is empty, or some input files didn't have .note.gnu.property sections), // we don't create this section. GnuPropertySection::GnuPropertySection(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, - ctx.arg.wordsize, ".note.gnu.property") {} + : SyntheticSection(ctx, ".note.gnu.property", SHT_NOTE, SHF_ALLOC, + ctx.arg.wordsize) {} void GnuPropertySection::writeTo(uint8_t *buf) { write32(ctx, buf, 4); // Name size @@ -365,7 +363,7 @@ size_t GnuPropertySection::getSize() const { } BuildIdSection::BuildIdSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"), + : SyntheticSection(ctx, ".note.gnu.build-id", SHT_NOTE, SHF_ALLOC, 4), hashSize(getHashSize(ctx)) {} void BuildIdSection::writeTo(uint8_t *buf) { @@ -383,14 +381,14 @@ void BuildIdSection::writeBuildId(ArrayRef buf) { BssSection::BssSection(Ctx &ctx, StringRef name, uint64_t size, uint32_t alignment) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_NOBITS, alignment, - name) { + : SyntheticSection(ctx, name, SHT_NOBITS, SHF_ALLOC | SHF_WRITE, + alignment) { this->bss = true; this->size = size; } EhFrameSection::EhFrameSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {} + : SyntheticSection(ctx, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, 1) {} // Search for an existing CIE record or create a new one. // CIE records from input object files are uniquified by their contents @@ -661,8 +659,8 @@ void EhFrameSection::writeTo(uint8_t *buf) { } GotSection::GotSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - ctx.target->gotEntrySize, ".got") { + : SyntheticSection(ctx, ".got", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + ctx.target->gotEntrySize) { numEntries = ctx.target->gotHeaderEntriesNum; } @@ -745,8 +743,8 @@ static uint64_t getMipsPageCount(uint64_t size) { } MipsGotSection::MipsGotSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, - SHT_PROGBITS, 16, ".got") {} + : SyntheticSection(ctx, ".got", SHT_PROGBITS, + SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, 16) {} void MipsGotSection::addEntry(InputFile &file, Symbol &sym, int64_t addend, RelExpr expr) { @@ -1179,8 +1177,8 @@ void MipsGotSection::writeTo(uint8_t *buf) { // section. I don't know why we have a BSS style type for the section but it is // consistent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI. GotPltSection::GotPltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - ctx.arg.wordsize, ".got.plt") { + : SyntheticSection(ctx, ".got.plt", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + ctx.arg.wordsize) { if (ctx.arg.emachine == EM_PPC) { name = ".plt"; } else if (ctx.arg.emachine == EM_PPC64) { @@ -1231,9 +1229,9 @@ static StringRef getIgotPltName(Ctx &ctx) { // On PowerPC64 the GotPltSection type is SHT_NOBITS so we have to follow suit // with the IgotPltSection. IgotPltSection::IgotPltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, + : SyntheticSection(ctx, getIgotPltName(ctx), ctx.arg.emachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS, - ctx.target->gotEntrySize, getIgotPltName(ctx)) {} + SHF_ALLOC | SHF_WRITE, ctx.target->gotEntrySize) {} void IgotPltSection::addEntry(Symbol &sym) { assert(ctx.symAux.back().pltIdx == entries.size()); @@ -1252,8 +1250,8 @@ void IgotPltSection::writeTo(uint8_t *buf) { } StringTableSection::StringTableSection(Ctx &ctx, StringRef name, bool dynamic) - : SyntheticSection(ctx, dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, - name), + : SyntheticSection(ctx, name, SHT_STRTAB, dynamic ? (uint64_t)SHF_ALLOC : 0, + 1), dynamic(dynamic) { // ELF string tables start with a NUL byte. strings.push_back(""); @@ -1296,8 +1294,8 @@ static unsigned getVerDefNum(Ctx &ctx) { template DynamicSection::DynamicSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, - ctx.arg.wordsize, ".dynamic") { + : SyntheticSection(ctx, ".dynamic", SHT_DYNAMIC, SHF_ALLOC | SHF_WRITE, + ctx.arg.wordsize) { this->entsize = ELFT::Is64Bits ? 16 : 8; // .dynamic section is not writable on MIPS and on Fuchsia OS @@ -1651,7 +1649,7 @@ RelocationBaseSection::RelocationBaseSection(Ctx &ctx, StringRef name, int32_t sizeDynamicTag, bool combreloc, unsigned concurrency) - : SyntheticSection(ctx, SHF_ALLOC, type, ctx.arg.wordsize, name), + : SyntheticSection(ctx, name, type, SHF_ALLOC, ctx.arg.wordsize), dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag), relocsVec(concurrency), combreloc(combreloc) {} @@ -1767,11 +1765,11 @@ template void RelocationSection::writeTo(uint8_t *buf) { RelrBaseSection::RelrBaseSection(Ctx &ctx, unsigned concurrency, bool isAArch64Auth) : SyntheticSection( - ctx, SHF_ALLOC, + ctx, isAArch64Auth ? ".relr.auth.dyn" : ".relr.dyn", isAArch64Auth ? SHT_AARCH64_AUTH_RELR : (ctx.arg.useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR), - ctx.arg.wordsize, isAArch64Auth ? ".relr.auth.dyn" : ".relr.dyn"), + SHF_ALLOC, ctx.arg.wordsize), relocsVec(concurrency) {} void RelrBaseSection::mergeRels() { @@ -2118,8 +2116,8 @@ template bool RelrSection::updateAllocSize(Ctx &ctx) { // Don't allow the section to shrink; otherwise the size of the section can // oscillate infinitely. Trailing 1s do not decode to more relocations. if (relrRelocs.size() < oldSize) { - Log(ctx) << ".relr.dyn needs " << Twine(oldSize - relrRelocs.size()) << - " padding word(s)"; + Log(ctx) << ".relr.dyn needs " << (oldSize - relrRelocs.size()) + << " padding word(s)"; relrRelocs.resize(oldSize, Elf_Relr(1)); } @@ -2128,10 +2126,10 @@ template bool RelrSection::updateAllocSize(Ctx &ctx) { SymbolTableBaseSection::SymbolTableBaseSection(Ctx &ctx, StringTableSection &strTabSec) - : SyntheticSection(ctx, strTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, + : SyntheticSection(ctx, strTabSec.isDynamic() ? ".dynsym" : ".symtab", strTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, - ctx.arg.wordsize, - strTabSec.isDynamic() ? ".dynsym" : ".symtab"), + strTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, + ctx.arg.wordsize), strTabSec(strTabSec) {} // Orders symbols according to their positions in the GOT, @@ -2348,7 +2346,7 @@ template void SymbolTableSection::writeTo(uint8_t *buf) { } SymtabShndxSection::SymtabShndxSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_SYMTAB_SHNDX, 4, ".symtab_shndx") { + : SyntheticSection(ctx, ".symtab_shndx", SHT_SYMTAB_SHNDX, 0, 4) { this->entsize = 4; } @@ -2419,8 +2417,8 @@ size_t SymtabShndxSection::getSize() const { // about .gnu.hash, you want to specify --hash-style=gnu. Otherwise, a // safe bet is to specify --hash-style=both for backward compatibility. GnuHashTableSection::GnuHashTableSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_HASH, ctx.arg.wordsize, - ".gnu.hash") {} + : SyntheticSection(ctx, ".gnu.hash", SHT_GNU_HASH, SHF_ALLOC, + ctx.arg.wordsize) {} void GnuHashTableSection::finalizeContents() { if (OutputSection *sec = getPartition(ctx).dynSymTab->getParent()) @@ -2529,7 +2527,7 @@ void GnuHashTableSection::addSymbols(SmallVectorImpl &v) { } HashTableSection::HashTableSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_HASH, 4, ".hash") { + : SyntheticSection(ctx, ".hash", SHT_HASH, SHF_ALLOC, 4) { this->entsize = 4; } @@ -2569,8 +2567,8 @@ void HashTableSection::writeTo(uint8_t *buf) { } PltSection::PltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".plt"), + : SyntheticSection(ctx, ".plt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + 16), headerSize(ctx.target->pltHeaderSize) { // On PowerPC, this section contains lazy symbol resolvers. if (ctx.arg.emachine == EM_PPC64) { @@ -2630,8 +2628,8 @@ void PltSection::addSymbols() { } IpltSection::IpltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".iplt") { + : SyntheticSection(ctx, ".iplt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + 16) { if (ctx.arg.emachine == EM_PPC || ctx.arg.emachine == EM_PPC64) { name = ".glink"; addralign = 4; @@ -2737,8 +2735,8 @@ size_t PPC32GlinkSection::getSize() const { // That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools // depend on it, so we implement the ABI. IBTPltSection::IBTPltSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".plt") {} + : SyntheticSection(ctx, ".plt", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + 16) {} void IBTPltSection::writeTo(uint8_t *buf) { ctx.target->writeIBTPlt(buf, ctx.in.plt->getNumEntries()); @@ -2752,8 +2750,8 @@ size_t IBTPltSection::getSize() const { bool IBTPltSection::isNeeded() const { return ctx.in.plt->getNumEntries() > 0; } RelroPaddingSection::RelroPaddingSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, - ".relro_padding") {} + : SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE, + 1) {} // The string hash function for .gdb_index. static uint32_t computeGdbHash(StringRef s) { @@ -2766,7 +2764,7 @@ static uint32_t computeGdbHash(StringRef s) { // 4-byte alignment ensures that values in the hash lookup table and the name // table are aligned. DebugNamesBaseSection::DebugNamesBaseSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_PROGBITS, 4, ".debug_names") {} + : SyntheticSection(ctx, ".debug_names", SHT_PROGBITS, 0, 4) {} // Get the size of the .debug_names section header in bytes for DWARF32: static uint32_t getDebugNamesHeaderSize(uint32_t augmentationStringSize) { @@ -2872,7 +2870,7 @@ void DebugNamesBaseSection::parseDebugNames( nd.hdr = ni.getHeader(); if (nd.hdr.Format != DwarfFormat::DWARF32) { Err(ctx) << namesSec.sec - << Twine(": found DWARF64, which is currently unsupported"); + << ": found DWARF64, which is currently unsupported"; return; } if (nd.hdr.Version != 5) { @@ -2882,8 +2880,7 @@ void DebugNamesBaseSection::parseDebugNames( uint32_t dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32); DWARFDebugNames::DWARFDebugNamesOffsets locs = ni.getOffsets(); if (locs.EntriesBase > namesExtractor.getData().size()) { - Err(ctx) << namesSec.sec - << Twine(": entry pool start is beyond end of section"); + Err(ctx) << namesSec.sec << ": entry pool start is beyond end of section"; return; } @@ -2964,7 +2961,7 @@ void DebugNamesBaseSection::computeHdrAndAbbrevTable( // ForeignTypeUnitCount are left as 0. if (nd.hdr.LocalTypeUnitCount || nd.hdr.ForeignTypeUnitCount) Warn(ctx) << inputChunk.section.sec - << Twine(": type units are not implemented"); + << ": type units are not implemented"; // If augmentation strings are not identical, use an empty string. if (i == 0) { hdr.AugmentationStringSize = nd.hdr.AugmentationStringSize; @@ -3358,7 +3355,7 @@ template void DebugNamesSection::writeTo(uint8_t *buf) { } GdbIndexSection::GdbIndexSection(Ctx &ctx) - : SyntheticSection(ctx, 0, SHT_PROGBITS, 1, ".gdb_index") {} + : SyntheticSection(ctx, ".gdb_index", SHT_PROGBITS, 0, 1) {} // Returns the desired size of an on-disk hash table for a .gdb_index section. // There's a tradeoff between size and collision rate. We aim 75% utilization. @@ -3654,7 +3651,7 @@ void GdbIndexSection::writeTo(uint8_t *buf) { bool GdbIndexSection::isNeeded() const { return !chunks.empty(); } EhFrameHeader::EhFrameHeader(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_PROGBITS, 4, ".eh_frame_hdr") {} + : SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {} void EhFrameHeader::writeTo(uint8_t *buf) { // Unlike most sections, the EhFrameHeader section is written while writing @@ -3699,8 +3696,8 @@ bool EhFrameHeader::isNeeded() const { } VersionDefinitionSection::VersionDefinitionSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), - ".gnu.version_d") {} + : SyntheticSection(ctx, ".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC, + sizeof(uint32_t)) {} StringRef VersionDefinitionSection::getFileDefName() { if (!getPartition(ctx).name.empty()) @@ -3761,8 +3758,8 @@ size_t VersionDefinitionSection::getSize() const { // .gnu.version is a table where each entry is 2 byte long. VersionTableSection::VersionTableSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), - ".gnu.version") { + : SyntheticSection(ctx, ".gnu.version", SHT_GNU_versym, SHF_ALLOC, + sizeof(uint16_t)) { this->entsize = 2; } @@ -3812,8 +3809,8 @@ void elf::addVerneed(Ctx &ctx, Symbol &ss) { template VersionNeedSection::VersionNeedSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), - ".gnu.version_r") {} + : SyntheticSection(ctx, ".gnu.version_r", SHT_GNU_verneed, SHF_ALLOC, + sizeof(uint32_t)) {} template void VersionNeedSection::finalizeContents() { for (SharedFile *f : ctx.sharedFiles) { @@ -4020,12 +4017,12 @@ void elf::combineEhSections(Ctx &ctx) { } MipsRldMapSection::MipsRldMapSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - ctx.arg.wordsize, ".rld_map") {} + : SyntheticSection(ctx, ".rld_map", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, + ctx.arg.wordsize) {} ARMExidxSyntheticSection::ARMExidxSyntheticSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, - ctx.arg.wordsize, ".ARM.exidx") {} + : SyntheticSection(ctx, ".ARM.exidx", SHT_ARM_EXIDX, + SHF_ALLOC | SHF_LINK_ORDER, ctx.arg.wordsize) {} static InputSection *findExidxSection(InputSection *isec) { for (InputSection *d : isec->dependentSections) @@ -4250,8 +4247,9 @@ bool ARMExidxSyntheticSection::isNeeded() const { } ThunkSection::ThunkSection(Ctx &ctx, OutputSection *os, uint64_t off) - : SyntheticSection(ctx, SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, - ctx.arg.emachine == EM_PPC64 ? 16 : 4, ".text.thunk") { + : SyntheticSection(ctx, ".text.thunk", SHT_PROGBITS, + SHF_ALLOC | SHF_EXECINSTR, + ctx.arg.emachine == EM_PPC64 ? 16 : 4) { this->parent = os; this->outSecOff = off; } @@ -4294,7 +4292,7 @@ bool ThunkSection::assignOffsets() { } PPC32Got2Section::PPC32Got2Section(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 4, ".got2") {} + : SyntheticSection(ctx, ".got2", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE, 4) {} bool PPC32Got2Section::isNeeded() const { // See the comment below. This is not needed if there is no other @@ -4327,9 +4325,9 @@ void PPC32Got2Section::finalizeContents() { // position-independent code the section has type SHT_NOBITS since it will be // allocated and filled in by the dynamic linker. PPC64LongBranchTargetSection::PPC64LongBranchTargetSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC | SHF_WRITE, - ctx.arg.isPic ? SHT_NOBITS : SHT_PROGBITS, 8, - ".branch_lt") {} + : SyntheticSection(ctx, ".branch_lt", + ctx.arg.isPic ? SHT_NOBITS : SHT_PROGBITS, + SHF_ALLOC | SHF_WRITE, 8) {} uint64_t PPC64LongBranchTargetSection::getEntryVA(const Symbol *sym, int64_t addend) { @@ -4393,7 +4391,7 @@ static uint8_t getAbiVersion(Ctx &ctx) { uint8_t ver = ctx.objectFiles[0]->abiVersion; for (InputFile *file : ArrayRef(ctx.objectFiles).slice(1)) if (file->abiVersion != ver) - ErrAlways(ctx) << "incompatible ABI version: " << file; + Err(ctx) << "incompatible ABI version: " << file; return ver; } @@ -4442,7 +4440,7 @@ template void elf::writePhdrs(uint8_t *buf, Partition &part) { template PartitionElfHeaderSection::PartitionElfHeaderSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_LLVM_PART_EHDR, 1, "") {} + : SyntheticSection(ctx, "", SHT_LLVM_PART_EHDR, SHF_ALLOC, 1) {} template size_t PartitionElfHeaderSection::getSize() const { @@ -4460,7 +4458,7 @@ void PartitionElfHeaderSection::writeTo(uint8_t *buf) { template PartitionProgramHeadersSection::PartitionProgramHeadersSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_LLVM_PART_PHDR, 1, ".phdrs") {} + : SyntheticSection(ctx, ".phdrs", SHT_LLVM_PART_PHDR, SHF_ALLOC, 1) {} template size_t PartitionProgramHeadersSection::getSize() const { @@ -4473,7 +4471,7 @@ void PartitionProgramHeadersSection::writeTo(uint8_t *buf) { } PartitionIndexSection::PartitionIndexSection(Ctx &ctx) - : SyntheticSection(ctx, SHF_ALLOC, SHT_PROGBITS, 4, ".rodata") {} + : SyntheticSection(ctx, ".rodata", SHT_PROGBITS, SHF_ALLOC, 4) {} size_t PartitionIndexSection::getSize() const { return 12 * (ctx.partitions.size() - 1); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index cf178411e1eacc..4b643e86335510 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -137,8 +137,8 @@ class GotSection final : public SyntheticSection { class GnuStackSection : public SyntheticSection { public: GnuStackSection(Ctx &ctx) - : SyntheticSection(ctx, 0, llvm::ELF::SHT_PROGBITS, 1, - ".note.GNU-stack") {} + : SyntheticSection(ctx, ".note.GNU-stack", llvm::ELF::SHT_PROGBITS, 0, + 1) {} void writeTo(uint8_t *buf) override {} size_t getSize() const override { return 0; } }; @@ -177,7 +177,9 @@ class BssSection final : public SyntheticSection { bool isNeeded() const override { return size != 0; } size_t getSize() const override { return size; } - static bool classof(const SectionBase *s) { return s->bss; } + static bool classof(const SectionBase *s) { + return isa(s) && cast(s)->bss; + } uint64_t size; }; @@ -1084,7 +1086,7 @@ class MergeSyntheticSection : public SyntheticSection { protected: MergeSyntheticSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags, uint32_t addralign) - : SyntheticSection(ctx, flags, type, addralign, name) {} + : SyntheticSection(ctx, name, type, flags, addralign) {} }; class MergeTailSection final : public MergeSyntheticSection { @@ -1396,8 +1398,8 @@ class PartitionIndexSection final : public SyntheticSection { class MemtagAndroidNote final : public SyntheticSection { public: MemtagAndroidNote(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, - /*alignment=*/4, ".note.android.memtag") {} + : SyntheticSection(ctx, ".note.android.memtag", llvm::ELF::SHT_NOTE, + llvm::ELF::SHF_ALLOC, /*addralign=*/4) {} void writeTo(uint8_t *buf) override; size_t getSize() const override; }; @@ -1405,8 +1407,8 @@ class MemtagAndroidNote final : public SyntheticSection { class PackageMetadataNote final : public SyntheticSection { public: PackageMetadataNote(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, - /*alignment=*/4, ".note.package") {} + : SyntheticSection(ctx, ".note.package", llvm::ELF::SHT_NOTE, + llvm::ELF::SHF_ALLOC, /*addralign=*/4) {} void writeTo(uint8_t *buf) override; size_t getSize() const override; }; @@ -1414,9 +1416,9 @@ class PackageMetadataNote final : public SyntheticSection { class MemtagGlobalDescriptors final : public SyntheticSection { public: MemtagGlobalDescriptors(Ctx &ctx) - : SyntheticSection(ctx, llvm::ELF::SHF_ALLOC, + : SyntheticSection(ctx, ".memtag.globals.dynamic", llvm::ELF::SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC, - /*alignment=*/4, ".memtag.globals.dynamic") {} + llvm::ELF::SHF_ALLOC, /*addralign=*/4) {} void writeTo(uint8_t *buf) override; // The size of the section is non-computable until all addresses are // synthetized, because the section's contents contain a sorted diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 203252dbac122e..63d813e550f938 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -84,7 +84,7 @@ void elf::setTarget(Ctx &ctx) { case EM_X86_64: return setX86_64TargetInfo(ctx); default: - Fatal(ctx) << "unsupported e_machine value: " << Twine(ctx.arg.emachine); + Fatal(ctx) << "unsupported e_machine value: " << ctx.arg.emachine; } } diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index ce42d3624a8f5b..fd1e5d33c438af 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -211,8 +211,8 @@ static inline std::string getErrorLoc(Ctx &ctx, const uint8_t *loc) { void processArmCmseSymbols(Ctx &); template uint32_t calcMipsEFlags(Ctx &); -uint8_t getMipsFpAbiFlag(Ctx &, uint8_t oldFlag, uint8_t newFlag, - llvm::StringRef fileName); +uint8_t getMipsFpAbiFlag(Ctx &, InputFile *file, uint8_t oldFlag, + uint8_t newFlag); bool isMipsN32Abi(Ctx &, const InputFile &f); bool isMicroMips(Ctx &); bool isMipsR6(Ctx &); @@ -292,7 +292,7 @@ inline void checkAlignment(Ctx &ctx, uint8_t *loc, uint64_t v, int n, if ((v & (n - 1)) != 0) Err(ctx) << getErrorLoc(ctx, loc) << "improper alignment for relocation " << rel.type << ": 0x" << llvm::utohexstr(v) - << " is not aligned to " << Twine(n) << " bytes"; + << " is not aligned to " << n << " bytes"; } // Endianness-aware read/write. diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 67497bad7cb235..a7fbdc07907044 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1572,8 +1572,8 @@ template void Writer::finalizeAddressDependentContent() { if (osec->addr % osec->addralign != 0) Warn(ctx) << "address (0x" << Twine::utohexstr(osec->addr) << ") of section " << osec->name - << " is not a multiple of alignment (" - << Twine(osec->addralign) << ")"; + << " is not a multiple of alignment (" << osec->addralign + << ")"; } // Sizes are no longer allowed to grow, so all allowable spills have been @@ -2794,7 +2794,7 @@ template void Writer::openFile() { if (fileSize != size_t(fileSize) || maxSize < fileSize) { std::string msg; raw_string_ostream s(msg); - s << "output file too large: " << Twine(fileSize) << " bytes\n" + s << "output file too large: " << fileSize << " bytes\n" << "section sizes:\n"; for (OutputSection *os : ctx.outputSections) s << os->name << ' ' << os->size << "\n"; diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 41bcd58acc27f7..a03561be925a85 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -166,6 +166,7 @@ struct Configuration { llvm::StringRef installName; llvm::StringRef clientName; llvm::StringRef mapFile; + llvm::StringRef ltoNewPmPasses; llvm::StringRef ltoObjPath; llvm::StringRef thinLTOJobs; llvm::StringRef umbrella; @@ -239,6 +240,7 @@ struct Configuration { SymtabPresence localSymbolsPresence = SymtabPresence::All; SymbolPatterns localSymbolPatterns; llvm::SmallVector mllvmOpts; + llvm::SmallVector passPlugins; bool zeroModTime = true; bool generateUuid = true; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 53b4372435ab54..4f39613ac17dec 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1741,6 +1741,7 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, config->umbrella = arg->getValue(); } config->ltoObjPath = args.getLastArgValue(OPT_object_path_lto); + config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes); config->thinLTOCacheDir = args.getLastArgValue(OPT_cache_path_lto); config->thinLTOCachePolicy = getLTOCachePolicy(args); config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); @@ -2110,6 +2111,8 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, config->mllvmOpts.emplace_back(arg->getValue()); } + config->passPlugins = args::getStrings(args, OPT_load_pass_plugins); + createSyntheticSections(); createSyntheticSymbols(); addSynthenticMethnames(); diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index 28f5290edb58e3..2eeca44ecbb3c4 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -44,6 +44,9 @@ static lto::Config createConfig() { c.Options.EmitAddrsig = config->icfLevel == ICFLevel::safe; for (StringRef C : config->mllvmOpts) c.MllvmArgs.emplace_back(C.str()); + for (StringRef pluginFn : config->passPlugins) + c.PassPlugins.push_back(std::string(pluginFn)); + c.OptPipeline = std::string(config->ltoNewPmPasses); c.CodeModel = getCodeModelFromCMModel(); c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 739d1da15d4660..d453ea3b9e11d9 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -162,6 +162,12 @@ def no_objc_category_merging : Flag<["-"], "no_objc_category_merging">, Group; def lto_debug_pass_manager: Flag<["--"], "lto-debug-pass-manager">, HelpText<"Debug new pass manager">, Group; +def lto_newpm_passes: Joined<["--"], "lto-newpm-passes=">, + HelpText<"Passes to run during LTO">, Group; +def load_pass_plugins : Separate<["--"], "load-pass-plugin">, Group; +def load_pass_plugins_eq : Joined<["--"], "load-pass-plugin=">, + Alias(load_pass_plugins)>, + HelpText<"Load passes from plugin library">, Group; def codegen_data_generate_path : Separate<["--"], "codegen-data-generate-path">, Group; def codegen_data_generate_path_eq : Joined<["--"], "codegen-data-generate-path=">, Alias(codegen_data_generate_path)>, MetaVarName<"">, diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index ee11f178939710..79e20be2bb6be8 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -71,6 +71,7 @@ #include "lld/Common/LLVM.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -151,20 +152,21 @@ void message(const Twine &msg, llvm::raw_ostream &s = outs()); void warn(const Twine &msg); uint64_t errorCount(); -enum class DiagLevel { Log, Msg, Warn, Err, Fatal }; +enum class DiagLevel { None, Log, Msg, Warn, Err, Fatal }; // A class that synchronizes thread writing to the same stream similar // std::osyncstream. class SyncStream { ErrorHandler &e; DiagLevel level; - std::string buf; + llvm::SmallString<0> buf; public: - mutable llvm::raw_string_ostream os{buf}; + mutable llvm::raw_svector_ostream os{buf}; SyncStream(ErrorHandler &e, DiagLevel level) : e(e), level(level) {} SyncStream(SyncStream &&o) : e(o.e), level(o.level), buf(std::move(o.buf)) {} ~SyncStream(); + StringRef str() { return os.str(); } }; [[noreturn]] void exitLld(int val); diff --git a/lld/test/COFF/arm64ec.test b/lld/test/COFF/arm64ec.test index e50b14ce0184c8..75288e97e598dd 100644 --- a/lld/test/COFF/arm64ec.test +++ b/lld/test/COFF/arm64ec.test @@ -4,6 +4,7 @@ RUN: split-file %s %t.dir && cd %t.dir RUN: llvm-mc -filetype=obj -triple=aarch64-windows arm64-data-sym.s -o arm64-data-sym.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-data-sym.s -o arm64ec-data-sym.obj RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-data-sym.s -o x86_64-data-sym.obj +RUN: llvm-mc -filetype=obj -triple=i686-windows x86_64-data-sym.s -o i686-data-sym.obj RUN: llvm-cvtres -machine:arm64x -out:arm64x-resource.obj %S/Inputs/resource.res RUN: lld-link -out:test.dll -machine:arm64ec arm64ec-data-sym.obj -dll -noentry @@ -46,6 +47,26 @@ RUN: not lld-link -out:test.dll -machine:arm64 arm64-data-sym.obj x86_64-data-sy RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s INCOMPAT3: lld-link: error: x86_64-data-sym.obj: machine type x64 conflicts with arm64 +arm64ec machine type can't be inferred, it must be specified explicitly. +RUN: not lld-link -out:test.dll arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s +INCOMPAT4: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec is ambiguous and cannot be inferred, use /machine:arm64ec or /machine:arm64x + +RUN: not lld-link -out:test.dll x86_64-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + +RUN: not lld-link -out:test.dll arm64-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + +RUN: not lld-link -out:test.dll i686-data-sym.obj arm64ec-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT5 %s +INCOMPAT5: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec conflicts with x86 + +arm64x can be inferred and when mixed with ARM64, the first one wins +RUN: lld-link -out:test.dll -dll -noentry arm64x-resource.obj arm64-data-sym.obj x86_64-data-sym.obj arm64ec-data-sym.obj +RUN: not lld-link -out:test.dll -dll -noentry arm64-data-sym.obj arm64x-resource.obj x86_64-data-sym.obj 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s +RUN: not lld-link -out:test.dll -dll -noentry arm64-data-sym.obj arm64x-resource.obj arm64ec-data-sym.obj 2>&1 | FileCheck -check-prefix=INCOMPAT4 %s + #--- arm64ec-data-sym.s .data .globl arm64ec_data_sym diff --git a/lld/test/ELF/arm-rwpi-debug-relocs.s b/lld/test/ELF/arm-rwpi-debug-relocs.s new file mode 100644 index 00000000000000..2bb968d4afa9ab --- /dev/null +++ b/lld/test/ELF/arm-rwpi-debug-relocs.s @@ -0,0 +1,54 @@ +/// Test that R_ARM_SBREL32 relocations in debug info are relocated as if the +/// static base register (r9) is zero. Real DWARF info will use an expression to +/// add this to the real value of the static base at runtime. + +// REQUIRES: arm +// RUN: rm -rf %t && split-file %s %t && cd %t + +// RUN: llvm-mc -filetype=obj -triple=armv7a asm.s -o obj.o +// RUN: ld.lld -T lds.ld obj.o -o exe.elf 2>&1 | FileCheck %s --implicit-check-not=warning: --allow-empty +// RUN: llvm-objdump -D exe.elf | FileCheck --check-prefix=DISASM %s + +// DISASM-LABEL: : +// DISASM-NEXT: 1000: 0000002a + +// DISASM-LABEL: : +// DISASM-NEXT: 2000: 000004d2 + +// DISASM-LABEL: <.debug_something>: +// DISASM-NEXT: 0: 00001000 +// DISASM-NEXT: ... +// DISASM-NEXT: 104: 00002000 + +//--- lds.ld +SECTIONS { + data1 0x1000 : { *(data1) } + data2 0x2000 : { *(data2) } +} + +//--- asm.s + .text + .type _start,%function + .globl _start +_start: + bx lr + .size _start, .-_start + + .section data1, "aw", %progbits + .type rw,%object + .globl rw +rw: + .long 42 + .size rw, 4 + + .section data2, "aw", %progbits + .type rw2,%object + .globl rw2 +rw2: + .long 1234 + .size rw2, 4 + + .section .debug_something, "", %progbits + .long rw(sbrel) + .space 0x100 + .long rw2(sbrel) diff --git a/lld/test/ELF/incompatible.s b/lld/test/ELF/incompatible.s index 39c25106f4d721..0d25acd857610b 100644 --- a/lld/test/ELF/incompatible.s +++ b/lld/test/ELF/incompatible.s @@ -6,11 +6,11 @@ // RUN: not ld.lld %ta.o %tb.o -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=A-AND-B %s -// A-AND-B: b.o is incompatible with {{.*}}a.o +// A-AND-B: error: {{.*}}b.o is incompatible with {{.*}}a.o -// RUN: not ld.lld %tb.o %tc.o -o /dev/null 2>&1 | \ +// RUN: ld.lld --noinhibit-exec %tb.o %tc.o -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=B-AND-C %s -// B-AND-C: c.o is incompatible with {{.*}}b.o +// B-AND-C: warning: {{.*}}c.o is incompatible with {{.*}}b.o // RUN: not ld.lld %ta.o %ti686.so -o /dev/null 2>&1 | \ // RUN: FileCheck --check-prefix=A-AND-SO %s @@ -69,8 +69,8 @@ // RUN: rm -f %t.a // RUN: llvm-ar rc %t.a %tc.o // RUN: llvm-mc -filetype=obj -triple=i686-linux %s -o %td.o -// RUN: not ld.lld %t.a %td.o 2>&1 -o /dev/null | FileCheck --check-prefix=ARCHIVE %s -// ARCHIVE: {{.*}}d.o is incompatible +// RUN: ld.lld --noinhibit-exec %t.a %td.o 2>&1 -o /dev/null | FileCheck --check-prefix=ARCHIVE %s +// ARCHIVE: warning: {{.*}}d.o is incompatible{{$}} .global _start _start: .data diff --git a/lld/test/ELF/linkerscript/symbol-location.s b/lld/test/ELF/linkerscript/symbol-location.s index 4620982bf3f206..fd5cc9de048f1d 100644 --- a/lld/test/ELF/linkerscript/symbol-location.s +++ b/lld/test/ELF/linkerscript/symbol-location.s @@ -2,6 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o # RUN: echo 'foo = _start;' > %t.script # RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared --defsym 'foo = _start' %t.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK2 ## Here we check that symbol 'foo' location is reported properly. @@ -9,6 +10,10 @@ # CHECK: >>> defined in {{.*}}.script:1 # CHECK: >>> referenced by {{.*}}.o:(.text+0x1) +# CHECK2: error: relocation R_X86_64_PC32 cannot be used against symbol 'foo' +# CHECK2: >>> defined in --defsym{{$}} +# CHECK2: >>> referenced by {{.*}}.o:(.text+0x1) + .text .globl _start _start: diff --git a/lld/test/ELF/merge-addr.s b/lld/test/ELF/merge-addr.s new file mode 100644 index 00000000000000..b36619788083b3 --- /dev/null +++ b/lld/test/ELF/merge-addr.s @@ -0,0 +1,63 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld %t.o -o %t.so -shared --section-start cst4=0x800 --section-start str=0x1000 +# RUN: llvm-readelf -r -S %t.so | FileCheck %s +# RUN: llvm-objdump -s %t.so | FileCheck %s --check-prefix=OBJDUMP + +# RUN: ld.lld %t.o -o %t0.so -O0 -shared --section-start cst4=0x800 --section-start str=0x1000 +# RUN: llvm-objdump -s %t0.so | FileCheck %s --check-prefix=OBJDUMP0 +# RUN: ld.lld %t.o -o %t2.so -O2 -shared --section-start cst4=0x800 --section-start str=0x1000 +# RUN: llvm-objdump -s %t2.so | FileCheck %s --check-prefix=OBJDUMP2 + +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: cst4 PROGBITS 0000000000000800 000800 000004 04 AM 0 0 1 +# CHECK-NEXT: str PROGBITS 0000000000001000 001000 000009 01 AMS 0 0 1 + +# CHECK: Relocation section '.rela.dyn' +# CHECK-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# CHECK-NEXT: R_X86_64_RELATIVE 802 +# CHECK-NEXT: R_X86_64_RELATIVE 1000 +# CHECK-NEXT: R_X86_64_RELATIVE 1006 +# CHECK-NEXT: R_X86_64_RELATIVE 1002 +# CHECK-EMPTY: + +# OBJDUMP: Contents of section str: +# OBJDUMP-NEXT: 1000 61006162 63006263 00 a.abc.bc. +# OBJDUMP: Contents of section .data: +# OBJDUMP-NEXT: 00000000 00000000 00000000 00000000 ................ +# OBJDUMP-NEXT: 00000000 00000000 ........ +# OBJDUMP: Contents of section .bar: +# OBJDUMP-NEXT: 0000 00080000 00000000 00080000 00000000 ................ + +# OBJDUMP0: Contents of section cst4: +# OBJDUMP0-NEXT: 0800 2a000000 2a000000 *...*... +# OBJDUMP0-NEXT: Contents of section str: +# OBJDUMP0-NEXT: 1000 61626300 61006263 00626300 abc.a.bc.bc. + +# OBJDUMP2: Contents of section cst4: +# OBJDUMP2-NEXT: 0800 2a000000 *... +# OBJDUMP2-NEXT: Contents of section str: +# OBJDUMP2-NEXT: 1000 61626300 6100 abc.a. + +.section cst4,"aM",@progbits,4 +.long 42 +.long 42 + +.section str,"aMS",@progbits,1 +abc: +.asciz "abc" +a: +.asciz "a" +bc: +.asciz "bc" +.asciz "bc" + +.data +.quad cst4 + 6 +.quad a +.quad bc +.quad abc + +.section .bar +.quad cst4 +.quad cst4 + 4 diff --git a/lld/test/ELF/merge-reloc.s b/lld/test/ELF/merge-reloc.s index a343d5679b58e2..75a48099f50892 100644 --- a/lld/test/ELF/merge-reloc.s +++ b/lld/test/ELF/merge-reloc.s @@ -1,91 +1,57 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -# RUN: ld.lld %t.o -r -o %t-rel -# RUN: llvm-readobj -S --section-data %t-rel | FileCheck %s +# RUN: ld.lld %t.o -r -o %t.ro +# RUN: llvm-readelf -S %t.ro | FileCheck %s +# RUN: llvm-objdump -s %t.ro | FileCheck %s --check-prefix=OBJDUMP -# When linker generates a relocatable object it does string merging in the same -# way as for regular link. It should keep SHF_MERGE flag and set proper sh_entsize -# value so that final link can perform the final merging optimization. +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=CHECK-PDE -# CHECK: Section { -# CHECK: Index: -# CHECK: Name: .rodata.1 ( -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_MERGE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: -# CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 4 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 4 -# CHECK-NEXT: EntrySize: 4 -# CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 42000000 -# CHECK-NEXT: ) -# CHECK-NEXT: } -# CHECK: Section { -# CHECK: Index: -# CHECK: Name: .rodata.2 ( -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_MERGE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: -# CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 8 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 8 -# CHECK-NEXT: EntrySize: 8 -# CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 42000000 42000000 -# CHECK-NEXT: ) -# CHECK-NEXT: } -# CHECK: Section { -# CHECK: Index: -# CHECK: Name: .data -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_WRITE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: -# CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 16 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 1 -# CHECK-NEXT: EntrySize: 0 -# CHECK-NEXT: SectionData ( -# CHECK-NEXT: 0000: 42000000 42000000 42000000 42000000 -# CHECK-NEXT: ) -# CHECK-NEXT: } +# CHECK: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# CHECK-NEXT: [ 1] .text PROGBITS 0000000000000000 000040 000000 00 AX 0 0 4 +# CHECK-NEXT: [ 2] .rodata.1 PROGBITS 0000000000000000 000040 000004 04 AM 0 0 4 +# CHECK-NEXT: [ 3] .rodata.2 PROGBITS 0000000000000000 000048 000008 08 AM 0 0 8 +# CHECK-NEXT: [ 4] .rodata.cst8 PROGBITS 0000000000000000 000050 000010 08 AM 0 0 1 +# CHECK-NEXT: [ 5] .rela.rodata.cst8 RELA 0000000000000000 000068 000030 18 I 9 4 8 +# CHECK-NEXT: [ 6] .cst4 PROGBITS 0000000000000000 000060 000008 04 AM 0 0 1 +# CHECK-NEXT: [ 7] .rela.cst4 RELA 0000000000000000 000098 000030 18 I 9 6 8 + +# OBJDUMP: Contents of section .rodata.1: +# OBJDUMP-NEXT: 0000 42000000 B... +# OBJDUMP-NEXT: Contents of section .rodata.2: +# OBJDUMP-NEXT: 0000 42000000 42000000 B...B... +# OBJDUMP-NEXT: Contents of section .rodata.cst8: +# OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 ................ +# OBJDUMP: Contents of section .cst4: +# OBJDUMP-NEXT: 0000 00000000 00000000 ........ + +# CHECK-PDE: [ 2] .cst4 PROGBITS 0000000000200140 000140 000008 04 AM 0 0 1 - .section .rodata.1,"aM",@progbits,4 - .align 4 - .global foo foo: - .long 0x42 - .long 0x42 - .long 0x42 - .section .rodata.2,"aM",@progbits,8 - .align 8 - .global bar -bar: - .long 0x42 - .long 0x42 - .long 0x42 - .long 0x42 +.section .rodata.1,"aM",@progbits,4 +.align 4 +.long 0x42 +.long 0x42 +.long 0x42 + +.section .rodata.2,"aM",@progbits,8 +.align 8 +.long 0x42 +.long 0x42 +.long 0x42 +.long 0x42 + +## Test that we keep a SHT_REL[A] section which relocates a SHF_MERGE section +## in -r mode. The relocated SHF_MERGE section is handled as non-mergeable. +.section .rodata.cst8,"aM",@progbits,8,unique,0 +.quad foo + +.section .rodata.cst8,"aM",@progbits,8,unique,1 +.quad foo - .data - .global gar -zed: - .long 0x42 - .long 0x42 - .long 0x42 - .long 0x42 +.section .cst4,"aM",@progbits,4,unique,0 +.long foo +.section .cst4,"aM",@progbits,4,unique,1 +.long foo diff --git a/lld/test/ELF/merge-relocatable.s b/lld/test/ELF/merge-relocatable.s deleted file mode 100644 index d376f4ca0b4227..00000000000000 --- a/lld/test/ELF/merge-relocatable.s +++ /dev/null @@ -1,23 +0,0 @@ -# REQUIRES: x86 - -## Test that we keep a SHT_REL[A] section which relocates a SHF_MERGE section -## in -r mode. The relocated SHF_MERGE section is handled as non-mergeable. - -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: ld.lld -r %t.o -o %t -# RUN: llvm-readobj -S %t | FileCheck %s - -# CHECK: Name: .rodata.cst8 -# CHECK-NOT: } -# CHECK: Size: 16 -# CHECK: Name: .rela.rodata.cst8 -# CHECK-NOT: } -# CHECK: Size: 48 - -foo: - -.section .rodata.cst8,"aM",@progbits,8,unique,0 -.quad foo - -.section .rodata.cst8,"aM",@progbits,8,unique,1 -.quad foo diff --git a/lld/test/ELF/merge-shared-str.s b/lld/test/ELF/merge-shared-str.s deleted file mode 100644 index 9ecdd64e973100..00000000000000 --- a/lld/test/ELF/merge-shared-str.s +++ /dev/null @@ -1,28 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -O3 -// RUN: llvm-readobj -r -S %t.so | FileCheck %s - - - .section foo,"aMS",@progbits,1 - .asciz "bar" - .asciz "ar" - - .data - .quad foo + 4 - - -// CHECK: Name: foo -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: SHF_STRINGS -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x260 - -// CHECK: Relocations [ -// CHECK-NEXT: Section ({{.*}}) .rela.dyn { -// CHECK-NEXT: 0x{{.*}} R_X86_64_RELATIVE - 0x261 -// CHECK-NEXT: } -// CHECK-NEXT: ] diff --git a/lld/test/ELF/merge-shared.s b/lld/test/ELF/merge-shared.s deleted file mode 100644 index 12cb738c1077ef..00000000000000 --- a/lld/test/ELF/merge-shared.s +++ /dev/null @@ -1,26 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -// RUN: llvm-readobj -r -S %t.so | FileCheck %s - - .section foo,"aM",@progbits,4 - .long 42 - .long 42 - - .data - .quad foo + 6 - - -// CHECK: Name: foo -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x260 - -// CHECK: Relocations [ -// CHECK-NEXT: Section ({{.*}}) .rela.dyn { -// CHECK-NEXT: 0x{{.*}} R_X86_64_RELATIVE - 0x262 -// CHECK-NEXT: } -// CHECK-NEXT: ] diff --git a/lld/test/ELF/merge-string.s b/lld/test/ELF/merge-string.s deleted file mode 100644 index 549195d5cf8058..00000000000000 --- a/lld/test/ELF/merge-string.s +++ /dev/null @@ -1,105 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld -O 2 %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck %s -// RUN: ld.lld -O 1 %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck --check-prefix=NOTAIL %s -// RUN: ld.lld -O 0 %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck --check-prefix=NOMERGE %s - - .section .rodata1,"aMS",@progbits,1 - .asciz "abc" -foo: - .ascii "a" -bar: - .asciz "bc" - .asciz "bc" - - .section .rodata2,"aMS",@progbits,2 - .p2align 1 -zed: - .short 20 - .short 0 - -// CHECK: Name: .rodata1 -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: SHF_STRINGS -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x20D -// CHECK-NEXT: Offset: 0x20D -// CHECK-NEXT: Size: 4 -// CHECK-NEXT: Link: 0 -// CHECK-NEXT: Info: 0 -// CHECK-NEXT: AddressAlignment: 1 -// CHECK-NEXT: EntrySize: 1 -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 61626300 |abc.| -// CHECK-NEXT: ) - -// NOTAIL: Name: .rodata1 -// NOTAIL-NEXT: Type: SHT_PROGBITS -// NOTAIL-NEXT: Flags [ -// NOTAIL-NEXT: SHF_ALLOC -// NOTAIL-NEXT: SHF_MERGE -// NOTAIL-NEXT: SHF_STRINGS -// NOTAIL-NEXT: ] -// NOTAIL-NEXT: Address: 0x20D -// NOTAIL-NEXT: Offset: 0x20D -// NOTAIL-NEXT: Size: 7 -// NOTAIL-NEXT: Link: 0 -// NOTAIL-NEXT: Info: 0 -// NOTAIL-NEXT: AddressAlignment: 1 -// NOTAIL-NEXT: EntrySize: 1 -// NOTAIL-NEXT: SectionData ( -// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.| -// NOTAIL-NEXT: ) - -// NOMERGE: Name: .rodata1 -// NOMERGE-NEXT: Type: SHT_PROGBITS -// NOMERGE-NEXT: Flags [ -// NOMERGE-NEXT: SHF_ALLOC -// NOMERGE-NEXT: SHF_MERGE -// NOMERGE-NEXT: SHF_STRINGS -// NOMERGE-NEXT: ] -// NOMERGE-NEXT: Address: 0x20D -// NOMERGE-NEXT: Offset: 0x20D -// NOMERGE-NEXT: Size: 11 -// NOMERGE-NEXT: Link: 0 -// NOMERGE-NEXT: Info: 0 -// NOMERGE-NEXT: AddressAlignment: 1 -// NOMERGE-NEXT: EntrySize: 1 -// NOMERGE-NEXT: SectionData ( -// NOMERGE-NEXT: 0000: 61626300 61626300 626300 |abc.abc.bc.| -// NOMERGE-NEXT: ) - -// CHECK: Name: .rodata2 -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: SHF_ALLOC -// CHECK-NEXT: SHF_MERGE -// CHECK-NEXT: SHF_STRINGS -// CHECK-NEXT: ] -// CHECK-NEXT: Address: 0x212 -// CHECK-NEXT: Offset: 0x212 -// CHECK-NEXT: Size: 4 -// CHECK-NEXT: Link: 0 -// CHECK-NEXT: Info: 0 -// CHECK-NEXT: AddressAlignment: 2 -// CHECK-NEXT: EntrySize: 2 -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 14000000 |....| -// CHECK-NEXT: ) - - -// CHECK: Name: foo -// CHECK-NEXT: Value: 0x20D - -// CHECK: Name: bar -// CHECK-NEXT: Value: 0x20E - -// CHECK: Name: zed -// CHECK-NEXT: Value: 0x212 -// CHECK-NEXT: Size: 0 diff --git a/lld/test/ELF/merge-to-non-alloc.s b/lld/test/ELF/merge-to-non-alloc.s deleted file mode 100644 index 17e826ed5bb0cb..00000000000000 --- a/lld/test/ELF/merge-to-non-alloc.s +++ /dev/null @@ -1,33 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld %t.o -o %t.so -shared -// RUN: llvm-readobj -S --section-data --symbols %t.so | FileCheck %s - -// CHECK: Name: .bar -// CHECK-NEXT: Type: SHT_PROGBITS -// CHECK-NEXT: Flags [ -// CHECK-NEXT: ] -// CHECK-NEXT: Address: -// CHECK-NEXT: Offset: -// CHECK-NEXT: Size: 16 -// CHECK-NEXT: Link: -// CHECK-NEXT: Info: -// CHECK-NEXT: AddressAlignment: -// CHECK-NEXT: EntrySize: -// CHECK-NEXT: SectionData ( -// CHECK-NEXT: 0000: 10020000 00000000 18020000 00000000 | -// CHECK-NEXT: ) - -// CHECK: Name: foo -// CHECK-NEXT: Value: 0x210 - - .section .foo,"aM",@progbits,4 - .align 4 - .global foo - .hidden foo -foo: - .long 0x42 - - .section .bar - .quad foo - .quad foo + 8 diff --git a/lld/test/ELF/reproduce.s b/lld/test/ELF/reproduce.s index 8818a9e35f4039..29dc109d5a4126 100644 --- a/lld/test/ELF/reproduce.s +++ b/lld/test/ELF/reproduce.s @@ -76,11 +76,12 @@ ## Check that directory path is stripped from -o # RUN: mkdir -p %t.dir/build4/a/b/c # RUN: cd %t.dir -# RUN: ld.lld build1/foo.o -o build4/a/b/c/bar -Map build4/map --print-archive-stats=build4/stats \ +# RUN: ld.lld build1/foo.o -o build4/a/b/c/bar -Map build4/map --dependency-file=build4/bar.d --print-archive-stats=build4/stats \ # RUN: --why-extract=build4/why -shared --as-needed --reproduce=repro4.tar # RUN: tar xOf repro4.tar repro4/response.txt | FileCheck %s --check-prefix=RSP4 # RSP4: -o bar # RSP4-NEXT: -Map map +# RSP4-NEXT: --dependency-file bar.d # RSP4-NEXT: --print-archive-stats=stats # RSP4-NEXT: --why-extract=why diff --git a/lld/test/MachO/ltopasses-extension.ll b/lld/test/MachO/ltopasses-extension.ll new file mode 100644 index 00000000000000..300f03d4cfdb6d --- /dev/null +++ b/lld/test/MachO/ltopasses-extension.ll @@ -0,0 +1,13 @@ +; REQUIRES: x86, plugins, examples + +; RUN: opt -module-summary %s -o %t.o +; RUN: %lld -dylib -%loadnewpmbye --lto-newpm-passes="goodbye" -mllvm %loadbye -mllvm -wave-goodbye %t.o -o /dev/null 2>&1 | FileCheck %s +; CHECK: Bye + +target triple = "x86_64-apple-macosx10.15.0" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +@junk = global i32 0 + +define ptr @somefunk() { + ret ptr @junk +} diff --git a/lldb/bindings/interface/SBThreadExtensions.i b/lldb/bindings/interface/SBThreadExtensions.i index bfcc4d17e8f829..860a2d765a6695 100644 --- a/lldb/bindings/interface/SBThreadExtensions.i +++ b/lldb/bindings/interface/SBThreadExtensions.i @@ -45,6 +45,12 @@ STRING_EXTENSION_OUTSIDE(SBThread) frames.append(frame) return frames + def get_stop_reason_data(self): + return [ + self.GetStopReasonDataAtIndex(idx) + for idx in range(self.GetStopReasonDataCount()) + ] + id = property(GetThreadID, None, doc='''A read only property that returns the thread ID as an integer.''') idx = property(GetIndexID, None, doc='''A read only property that returns the thread index ID as an integer. Thread index ID values start at 1 and increment as threads come and go and can be used to uniquely identify threads.''') return_value = property(GetStopReturnValue, None, doc='''A read only property that returns an lldb object that represents the return value from the last stop (lldb.SBValue) if we just stopped due to stepping out of a function.''') @@ -56,6 +62,7 @@ STRING_EXTENSION_OUTSIDE(SBThread) queue = property(GetQueueName, None, doc='''A read only property that returns the dispatch queue name of this thread as a string.''') queue_id = property(GetQueueID, None, doc='''A read only property that returns the dispatch queue id of this thread as an integer.''') stop_reason = property(GetStopReason, None, doc='''A read only property that returns an lldb enumeration value (see enumerations that start with "lldb.eStopReason") that represents the reason this thread stopped.''') + stop_reason_data = property(get_stop_reason_data, None, doc='''A read only property that returns the stop reason data as a list.''') is_suspended = property(IsSuspended, None, doc='''A read only property that returns a boolean value that indicates if this thread is suspended.''') is_stopped = property(IsStopped, None, doc='''A read only property that returns a boolean value that indicates if this thread is stopped but not exited.''') %} diff --git a/lldb/docs/use/aarch64-linux.md b/lldb/docs/use/aarch64-linux.md index 70432f57857a59..393838dc0bb4f5 100644 --- a/lldb/docs/use/aarch64-linux.md +++ b/lldb/docs/use/aarch64-linux.md @@ -17,7 +17,7 @@ In LLDB you will be able to see the following new registers: * `z0-z31` vector registers, each one has size equal to the vector length. * `p0-p15` predicate registers, each one containing 1 bit per byte in the vector - length. Making each one vector length / 8 sized. + length. So each one is `vector length in bits / 8` bits. * `ffr` the first fault register, same size as a predicate register. * `vg`, the vector length in "granules". Each granule is 8 bytes. diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index b8c53a474ba6b9..a184e6dd891aff 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1380,6 +1380,8 @@ class Process : public std::enable_shared_from_this, virtual bool GetProcessInfo(ProcessInstanceInfo &info); + virtual lldb_private::UUID FindModuleUUID(const llvm::StringRef path); + /// Get the exit status for a process. /// /// \return diff --git a/lldb/packages/Python/lldbsuite/test/lldbutil.py b/lldb/packages/Python/lldbsuite/test/lldbutil.py index 660a3c085a908a..07b5f8cc7d900b 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbutil.py @@ -1158,17 +1158,6 @@ def GetModuleName(i): return list(map(GetModuleName, list(range(thread.GetNumFrames())))) -def get_stack_frames(thread): - """ - Returns a sequence of stack frames for this thread. - """ - - def GetStackFrame(i): - return thread.GetFrameAtIndex(i) - - return list(map(GetStackFrame, list(range(thread.GetNumFrames())))) - - def print_stacktrace(thread, string_buffer=False): """Prints a simple stack trace of this thread.""" diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp index 68d6ab0850853f..acc84dbf016fbe 100644 --- a/lldb/source/Core/DynamicLoader.cpp +++ b/lldb/source/Core/DynamicLoader.cpp @@ -157,6 +157,10 @@ DynamicLoader::GetSectionListFromModule(const ModuleSP module) const { ModuleSP DynamicLoader::FindModuleViaTarget(const FileSpec &file) { Target &target = m_process->GetTarget(); ModuleSpec module_spec(file, target.GetArchitecture()); + if (UUID uuid = m_process->FindModuleUUID(file.GetPath())) { + // Process may be able to augment the module_spec with UUID, e.g. ELF core. + module_spec.GetUUID() = uuid; + } if (ModuleSP module_sp = target.GetImages().FindFirstModule(module_spec)) return module_sp; diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index a5ee3cfdb2932a..b3916cc913f7db 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -286,6 +286,14 @@ void ProcessElfCore::UpdateBuildIdForNTFileEntries() { } } +UUID ProcessElfCore::FindModuleUUID(const llvm::StringRef path) { + // Returns the gnu uuid from matched NT_FILE entry + for (NT_FILE_Entry &entry : m_nt_file_entries) + if (path == entry.path) + return entry.uuid; + return UUID(); +} + lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() { if (m_dyld_up.get() == nullptr) m_dyld_up.reset(DynamicLoader::FindPlugin( diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h index 280c61ed376396..a91c04a277f601 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h @@ -163,6 +163,8 @@ class ProcessElfCore : public lldb_private::PostMortemProcess { // Populate gnu uuid for each NT_FILE entry void UpdateBuildIdForNTFileEntries(); + lldb_private::UUID FindModuleUUID(const llvm::StringRef path) override; + // Returns the value of certain type of note of a given start address lldb_private::UUID FindBuidIdInCoreMemory(lldb::addr_t address); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index c18edd10b96819..30c890d6d01388 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -137,9 +137,19 @@ void DWARFIndex::GetTypesWithQuery( bool DWARFIndex::ProcessTypeDIEMatchQuery( TypeQuery &query, DWARFDIE die, llvm::function_ref callback) { - // Nothing to match from query - if (query.GetContextRef().size() <= 1) + // Check the language, but only if we have a language filter. + if (query.HasLanguage() && + !query.LanguageMatches(SymbolFileDWARF::GetLanguageFamily(*die.GetCU()))) + return true; // Keep iterating over index types, language mismatch. + + // Since mangled names are unique, we only need to check if the names are + // the same. + if (query.GetSearchByMangledName()) { + if (die.GetMangledName(/*substitute_name_allowed=*/false) != + query.GetTypeBasename().GetStringRef()) + return true; // Keep iterating over index types, mangled name mismatch. return callback(die); + } std::vector die_context; if (query.GetModuleSearch()) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 8ce0db4588a46a..c900f330b481bb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2726,39 +2726,8 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { TypeQuery query_full(query); bool have_index_match = false; m_index->GetTypesWithQuery(query_full, [&](DWARFDIE die) { - // Check the language, but only if we have a language filter. - if (query.HasLanguage()) { - if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU()))) - return true; // Keep iterating over index types, language mismatch. - } - - // Since mangled names are unique, we only need to check if the names are - // the same. - if (query.GetSearchByMangledName()) { - if (die.GetMangledName(/*substitute_name_allowed=*/false) != - query.GetTypeBasename().GetStringRef()) - return true; // Keep iterating over index types, mangled name mismatch. - if (Type *matching_type = ResolveType(die, true, true)) { - results.InsertUnique(matching_type->shared_from_this()); - return !results.Done(query); // Keep iterating if we aren't done. - } - return true; // Keep iterating over index types, weren't able to resolve - // this type - } - - // Check the context matches - std::vector die_context; - if (query.GetModuleSearch()) - die_context = die.GetDeclContext(); - else - die_context = die.GetTypeLookupContext(); - assert(!die_context.empty()); - if (!query.ContextMatches(die_context)) - return true; // Keep iterating over index types, context mismatch. - - // Try to resolve the type. if (Type *matching_type = ResolveType(die, true, true)) { - if (matching_type->IsTemplateType()) { + if (!query.GetSearchByMangledName() && matching_type->IsTemplateType()) { // We have to watch out for case where we lookup a type by basename and // it matches a template with simple template names. Like looking up // "Foo" and if we have simple template names then we will match @@ -2790,7 +2759,7 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { // With -gsimple-template-names, a templated type's DW_AT_name will not // contain the template parameters. Try again stripping '<' and anything // after, filtering out entries with template parameters that don't match. - if (!have_index_match) { + if (!have_index_match && !query.GetSearchByMangledName()) { // Create a type matcher with a compiler context that is tuned for // -gsimple-template-names. We will use this for the index lookup and the // context matching, but will use the original "match" to insert matches @@ -2804,23 +2773,6 @@ void SymbolFileDWARF::FindTypes(const TypeQuery &query, TypeResults &results) { // Copy our match's context and update the basename we are looking for // so we can use this only to compare the context correctly. m_index->GetTypesWithQuery(query_simple, [&](DWARFDIE die) { - // Check the language, but only if we have a language filter. - if (query.HasLanguage()) { - if (!query.LanguageMatches(GetLanguageFamily(*die.GetCU()))) - return true; // Keep iterating over index types, language mismatch. - } - - // Check the context matches - std::vector die_context; - if (query.GetModuleSearch()) - die_context = die.GetDeclContext(); - else - die_context = die.GetTypeLookupContext(); - assert(!die_context.empty()); - if (!query_simple.ContextMatches(die_context)) - return true; // Keep iterating over index types, context mismatch. - - // Try to resolve the type. if (Type *matching_type = ResolveType(die, true, true)) { ConstString name = matching_type->GetQualifiedName(); // We have found a type that still might not match due to template diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 9125ceca74a003..db33525978a16a 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -6080,6 +6080,10 @@ bool Process::GetProcessInfo(ProcessInstanceInfo &info) { return platform_sp->GetProcessInfo(GetID(), info); } +lldb_private::UUID Process::FindModuleUUID(const llvm::StringRef path) { + return lldb_private::UUID(); +} + ThreadCollectionSP Process::GetHistoryThreads(lldb::addr_t addr) { ThreadCollectionSP threads; diff --git a/lldb/test/API/lang/c/stepping/TestStepAndBreakpoints.py b/lldb/test/API/lang/c/stepping/TestStepAndBreakpoints.py index 8d0de40cdd7b68..9fe787bcaa9fb7 100644 --- a/lldb/test/API/lang/c/stepping/TestStepAndBreakpoints.py +++ b/lldb/test/API/lang/c/stepping/TestStepAndBreakpoints.py @@ -99,9 +99,7 @@ def test_and_python_api(self): frame = thread.GetFrameAtIndex(0) current_line = frame.GetLineEntry().GetLine() current_file = frame.GetLineEntry().GetFileSpec() - current_bp = [] - current_bp.append(thread.GetStopReasonDataAtIndex(0)) - current_bp.append(thread.GetStopReasonDataAtIndex(1)) + current_bp = thread.stop_reason_data stop_id_before_expression = process.GetStopID() stop_id_before_including_expressions = process.GetStopID(True) @@ -124,9 +122,9 @@ def test_and_python_api(self): lldb.eStopReasonBreakpoint, "We still say we stopped for a breakpoint.", ) - self.assertTrue( - thread.GetStopReasonDataAtIndex(0) == current_bp[0] - and thread.GetStopReasonDataAtIndex(1) == current_bp[1], + self.assertEqual( + thread.stop_reason_data, + current_bp, "And it is the same breakpoint.", ) diff --git a/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py b/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py index f1c23a58d1f486..dbb9576ed4d51a 100644 --- a/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py +++ b/lldb/test/API/symbol_ondemand/shared_library/TestSharedLibOnDemand.py @@ -59,7 +59,7 @@ def test_source_line_breakpoint(self): lldbutil.check_breakpoint(self, bpno=1, expected_hit_count=1) thread = process.GetSelectedThread() - stack_frames = lldbutil.get_stack_frames(thread) + stack_frames = thread.frames self.assertGreater(len(stack_frames), 2) leaf_frame = stack_frames[0] @@ -97,7 +97,7 @@ def test_symbolic_breakpoint(self): lldbutil.check_breakpoint(self, bpno=1, expected_hit_count=1) thread = process.GetSelectedThread() - stack_frames = lldbutil.get_stack_frames(thread) + stack_frames = thread.frames self.assertGreater(len(stack_frames), 2) leaf_frame = stack_frames[0] diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 71dd4c6b75b633..c3f98c9bbdff53 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -121,8 +121,8 @@ david.trevelyan@gmail.com (email), [davidtrevelyan](https://github.com/davidtrev #### Parts of code generator not covered by someone else -Evan Cheng \ -evan.cheng@apple.com (email) +Matt Arsenault \ +Matthew.Arsenault@amd.com, arsenm2@gmail.com (email), [arsenm](https://github.com/arsenm) (GitHub) #### SelectionDAG @@ -469,6 +469,7 @@ sabre@nondot.org (email), [lattner](https://github.com/lattner) (GitHub), clattn ### Inactive or former component maintainers Justin Bogner (mail@justinbogner.com, [bogner](https://github.com/bogner)) -- SelectionDAG \ +Evan Cheng (evan.cheng@apple.com) -- Parts of code generator not covered by someone else \ Hans Wennborg (hans@chromium.org, [zmodem](https://github.com/zmodem)) -- Release management \ ### Former maintainers of removed components diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 161363e0dd6bcc..411a1209ef947e 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1407,6 +1407,19 @@ The AMDGPU backend implements the following LLVM IR intrinsics. llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4 Emit `v_mfma_scale_f32_32x32x64_f8f6f4` + llvm.amdgcn.permlane16.swap Provide direct access to `v_permlane16_swap_b32` instruction on supported targets. + Swaps the values across lanes of first 2 operands. Odd rows of the first operand are + swapped with even rows of the second operand (one row is 16 lanes). + Returns a pair for the swapped registers. The first element of the return corresponds + to the swapped element of the first argument. + + + llvm.amdgcn.permlane32.swap Provide direct access to `v_permlane32_swap_b32` instruction on supported targets. + Swaps the values across lanes of first 2 operands. Rows 2 and 3 of the first operand are + swapped with rows 0 and 1 of the second operand (one row is 16 lanes). + Returns a pair for the swapped registers. The first element of the return + corresponds to the swapped element of the first argument. + ============================================== ========================================================== .. TODO:: diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 9f4c90ba82a419..79bdd25c18f1fd 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -21522,9 +21522,9 @@ This is an overloaded intrinsic. :: - declare <16 x i32> @llvm.vp.abs.v16i32 (<16 x i32> , <16 x i1> , i32 , i1 ) - declare @llvm.vp.abs.nxv4i32 ( , , i32 , i1 ) - declare <256 x i64> @llvm.vp.abs.v256i64 (<256 x i64> , <256 x i1> , i32 , i1 ) + declare <16 x i32> @llvm.vp.abs.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare @llvm.vp.abs.nxv4i32 ( , i1 , , i32 ) + declare <256 x i64> @llvm.vp.abs.v256i64 (<256 x i64> , i1 , <256 x i1> , i32 ) Overview: """"""""" @@ -21536,12 +21536,12 @@ Arguments: """""""""" The first argument and the result have the same vector of integer type. The -second argument is the vector mask and has the same number of elements as the -result vector type. The third argument is the explicit vector length of the -operation. The fourth argument must be a constant and is a flag to indicate -whether the result value of the '``llvm.vp.abs``' intrinsic is a -:ref:`poison value ` if the first argument is statically or -dynamically an ``INT_MIN`` value. +second argument must be a constant and is a flag to indicate whether the result +value of the '``llvm.vp.abs``' intrinsic is a :ref:`poison value ` +if the first argument is statically or dynamically an ``INT_MIN`` value. The +third argument is the vector mask and has the same number of elements as the +result vector type. The fourth argument is the explicit vector length of the +operation. Semantics: """""""""" @@ -21554,7 +21554,7 @@ Examples: .. code-block:: llvm - %r = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false) + %r = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %a, i1 false, <4 x i1> %mask, i32 %evl) ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r %t = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 false) @@ -25260,9 +25260,9 @@ This is an overloaded intrinsic. :: - declare <16 x i32> @llvm.vp.ctlz.v16i32 (<16 x i32> , <16 x i1> , i32 , i1 ) - declare @llvm.vp.ctlz.nxv4i32 ( , , i32 , i1 ) - declare <256 x i64> @llvm.vp.ctlz.v256i64 (<256 x i64> , <256 x i1> , i32 , i1 ) + declare <16 x i32> @llvm.vp.ctlz.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare @llvm.vp.ctlz.nxv4i32 ( , i1 , , i32 ) + declare <256 x i64> @llvm.vp.ctlz.v256i64 (<256 x i64> , i1 , <256 x i1> , i32 ) Overview: """"""""" @@ -25274,11 +25274,11 @@ Arguments: """""""""" The first argument and the result have the same vector of integer type. The -second argument is the vector mask and has the same number of elements as the -result vector type. The third argument is the explicit vector length of the -operation. The fourth argument is a constant flag that indicates whether the -intrinsic returns a valid result if the first argument is zero. If the first -argument is zero and the fourth argument is true, the result is poison. +second argument is a constant flag that indicates whether the intrinsic returns +a valid result if the first argument is zero. The third argument is the vector +mask and has the same number of elements as the result vector type. the fourth +argument is the explicit vector length of the operation. If the first argument +is zero and the second argument is true, the result is poison. Semantics: """""""""" @@ -25291,7 +25291,7 @@ Examples: .. code-block:: llvm - %r = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false) + %r = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %a, , i1 false, <4 x i1> %mask, i32 %evl) ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r %t = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) @@ -25309,9 +25309,9 @@ This is an overloaded intrinsic. :: - declare <16 x i32> @llvm.vp.cttz.v16i32 (<16 x i32> , <16 x i1> , i32 , i1 ) - declare @llvm.vp.cttz.nxv4i32 ( , , i32 , i1 ) - declare <256 x i64> @llvm.vp.cttz.v256i64 (<256 x i64> , <256 x i1> , i32 , i1 ) + declare <16 x i32> @llvm.vp.cttz.v16i32 (<16 x i32> , i1 , <16 x i1> , i32 ) + declare @llvm.vp.cttz.nxv4i32 ( , i1 , , i32 ) + declare <256 x i64> @llvm.vp.cttz.v256i64 (<256 x i64> , i1 , <256 x i1> , i32 ) Overview: """"""""" @@ -25323,11 +25323,11 @@ Arguments: """""""""" The first argument and the result have the same vector of integer type. The -second argument is the vector mask and has the same number of elements as the -result vector type. The third argument is the explicit vector length of the -operation. The fourth argument is a constant flag that indicates whether the -intrinsic returns a valid result if the first argument is zero. If the first -argument is zero and the fourth argument is true, the result is poison. +second argument is a constant flag that indicates whether the intrinsic +returns a valid result if the first argument is zero. The third argument is +the vector mask and has the same number of elements as the result vector type. +The fourth argument is the explicit vector length of the operation. If the +first argument is zero and the second argument is true, the result is poison. Semantics: """""""""" @@ -25340,7 +25340,7 @@ Examples: .. code-block:: llvm - %r = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl, i1 false) + %r = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %a, i1 false, <4 x i1> %mask, i32 %evl) ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r %t = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index f7d81636f4dd4e..07f87d44088e7e 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -138,6 +138,7 @@ typedef enum { LLVMDWARFSourceLanguageRuby, LLVMDWARFSourceLanguageMove, LLVMDWARFSourceLanguageHylo, + LLVMDWARFSourceLanguageMetal, // Vendor extensions: LLVMDWARFSourceLanguageMips_Assembler, diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 4aa6defdc3a41f..2bb84fbc864d8e 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -967,6 +967,7 @@ HANDLE_DW_LANG(0x0036, HLSL, 0, 0, DWARF) HANDLE_DW_LANG(0x0037, OpenCL_CPP, 0, 0, DWARF) HANDLE_DW_LANG(0x0038, CPP_for_OpenCL, 0, 0, DWARF) HANDLE_DW_LANG(0x0039, SYCL, 0, 0, DWARF) +HANDLE_DW_LANG(0x003d, Metal, 0, 0, DWARF) HANDLE_DW_LANG(0x0040, Ruby, 0, 0, DWARF) HANDLE_DW_LANG(0x0041, Move, 0, 0, DWARF) HANDLE_DW_LANG(0x0042, Hylo, 0, 0, DWARF) @@ -1032,6 +1033,7 @@ HANDLE_DW_LNAME(0x0025, SYCL, "SYCL", 0) // YYYYRR HANDLE_DW_LNAME(0x0026, Ruby, "Ruby", 0) // VVMMPP HANDLE_DW_LNAME(0x0027, Move, "Move", 0) // YYYYMM HANDLE_DW_LNAME(0x0028, Hylo, "Hylo", 0) +HANDLE_DW_LNAME(0x002c, Metal, "Metal", 0) // VVMMPP // DWARF attribute type encodings. HANDLE_DW_ATE(0x01, address, 2, DWARF) diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 4657ad30eb1be4..3be819c0a76eeb 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -353,6 +353,8 @@ inline std::optional toDW_LANG(SourceLanguageName name, return DW_LANG_Move; case DW_LNAME_Hylo: return DW_LANG_Hylo; + case DW_LNAME_Metal: + return DW_LANG_Metal; } return {}; } @@ -479,6 +481,8 @@ toDW_LNAME(SourceLanguage language) { return {{DW_LNAME_Move, 0}}; case DW_LANG_Hylo: return {{DW_LNAME_Hylo, 0}}; + case DW_LANG_Metal: + return {{DW_LNAME_Metal, 0}}; case DW_LANG_BORLAND_Delphi: case DW_LANG_CPP_for_OpenCL: case DW_LANG_lo_user: @@ -562,6 +566,7 @@ inline bool isCPlusPlus(SourceLanguage S) { case DW_LANG_Ruby: case DW_LANG_Move: case DW_LANG_Hylo: + case DW_LANG_Metal: result = false; break; } @@ -641,6 +646,7 @@ inline bool isFortran(SourceLanguage S) { case DW_LANG_Ruby: case DW_LANG_Move: case DW_LANG_Hylo: + case DW_LANG_Metal: result = false; break; } @@ -718,6 +724,7 @@ inline bool isC(SourceLanguage S) { case DW_LANG_Ruby: case DW_LANG_Move: case DW_LANG_Hylo: + case DW_LANG_Metal: return false; } llvm_unreachable("Unknown language kind."); diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index a0fb32f67e3858..41909a8fc1d590 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -307,12 +307,12 @@ enum GlobalValueSummarySymtabCodes { // [valueid, n x stackidindex] FS_PERMODULE_CALLSITE_INFO = 26, // Summary of per-module allocation memprof metadata. - // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex), + // [nummib, nummib x (alloc type, context radix tree index), // [nummib x (numcontext x total size)]?] FS_PERMODULE_ALLOC_INFO = 27, // Summary of combined index memprof callsite metadata. - // [valueid, numstackindices, numver, - // numstackindices x stackidindex, numver x version] + // [valueid, context radix tree index, numver, + // numver x version] FS_COMBINED_CALLSITE_INFO = 28, // Summary of combined index allocation memprof metadata. // [nummib, numver, @@ -331,6 +331,10 @@ enum GlobalValueSummarySymtabCodes { // the entries must be in the exact same order as the corresponding sizes. // [nummib x (numcontext x full stack id)] FS_ALLOC_CONTEXT_IDS = 31, + // Linearized radix tree of allocation contexts. See the description above the + // CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format. + // [n x entry] + FS_CONTEXT_RADIX_TREE_ARRAY = 32, }; enum MetadataCodes { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b3583e2819ee4c..d2fc40d8ae037e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -277,6 +277,20 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); } + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; + } + bool hasBranchDivergence(const Function *F = nullptr) { return false; } bool isSourceOfDivergence(const Value *V) { return false; } @@ -2765,6 +2779,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + if (auto *FTy = dyn_cast(Ty); + FTy && IsUnsigned && Opcode == Instruction::Add && + FTy->getElementType() == IntegerType::getInt1Ty(Ty->getContext())) { + // Represent vector_reduce_add(ZExt()) as + // ZExtOrTrunc(ctpop(bitcast to in)). + auto *IntTy = + IntegerType::get(ResTy->getContext(), FTy->getNumElements()); + IntrinsicCostAttributes ICA(Intrinsic::ctpop, IntTy, {IntTy}, FMF); + return thisT()->getCastInstrCost(Instruction::BitCast, IntTy, FTy, + TTI::CastContextHint::None, CostKind) + + thisT()->getIntrinsicInstrCost(ICA, CostKind); + } // Without any native support, this is equivalent to the cost of // vecreduce.opcode(ext(Ty A)). VectorType *ExtTy = VectorType::get(ResTy, Ty); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index f682b20816d57f..2384b22c052662 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -378,6 +378,8 @@ class LegalizerHelper { LLT CastTy); LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy); + LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy); LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy); LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 8b1c11a6f41301..b681a0708db4b9 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -292,6 +292,9 @@ LegalityPredicate isPointer(unsigned TypeIdx); /// True iff the specified type index is a pointer with the specified address /// space. LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace); +/// True iff the specified type index is a vector of pointers (with any address +/// space). +LegalityPredicate isPointerVector(unsigned TypeIdx); /// True if the type index is a vector with element type \p EltTy LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 75e73bedd9348e..bd7fb2361aaeb1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -769,6 +769,7 @@ def OMP_Flush : Directive<"flush"> { // OMPKinds.def. VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_None; let category = CA_Executable; diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 61dba265dc948b..730baa8cc00520 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -278,6 +278,7 @@ class Instruction : public User, bool isUnaryOp() const { return isUnaryOp(getOpcode()); } bool isBinaryOp() const { return isBinaryOp(getOpcode()); } bool isIntDivRem() const { return isIntDivRem(getOpcode()); } + bool isFPDivRem() const { return isFPDivRem(getOpcode()); } bool isShift() const { return isShift(getOpcode()); } bool isCast() const { return isCast(getOpcode()); } bool isFuncletPad() const { return isFuncletPad(getOpcode()); } @@ -304,6 +305,10 @@ class Instruction : public User, return Opcode == UDiv || Opcode == SDiv || Opcode == URem || Opcode == SRem; } + static inline bool isFPDivRem(unsigned Opcode) { + return Opcode == FDiv || Opcode == FRem; + } + /// Determine if the Opcode is one of the shift instructions. static inline bool isShift(unsigned Opcode) { return Opcode >= Shl && Opcode <= AShr; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index b743106a3c7c43..73599851f50003 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2973,12 +2973,10 @@ class AMDGPUMfmaIntrinsic : // blgp. // // These should be <8 x i32> for f8 formats, <6 x i32> for f6 formats, -// and <4 x i32> for f4 formats. If the format control bits imply a -// smaller type than used, the high elements will be truncated. -// -// If the format control bits imply a larger type than used, the high -// elements are padded with undef. - +// and <4 x i32> for f4 formats. It is invalid to use a format that +// requires more registers than the corresponding vector type (e.g. it +// is illegal to use <6 x i32> in operand 0 if cbsz specifies an f8 +// format that requires 8 registers). class AMDGPUMfmaScaleIntrinsic : DefaultAttrsIntrinsic<[DestTy], [llvm_anyvector_ty, llvm_anyvector_ty, DestTy, @@ -3162,8 +3160,26 @@ def int_amdgcn_smfmac_f32_16x16x128_bf8_bf8 : AMDGPUMSmfmacIntrinsic; def int_amdgcn_smfmac_f32_16x16x128_fp8_bf8 : AMDGPUMSmfmacIntrinsic; def int_amdgcn_smfmac_f32_16x16x128_fp8_fp8 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_32x32x64_bf8_bf8 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_32x32x64_bf8_fp8 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_32x32x64_fp8_bf8 : AMDGPUMSmfmacIntrinsic; +def int_amdgcn_smfmac_f32_32x32x64_fp8_fp8 : AMDGPUMSmfmacIntrinsic; } +// { vdst_new, vsrc_new } llvm.amdgcn.permlane16.swap +def int_amdgcn_permlane16_swap : + Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, + llvm_i1_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; + +// { vdst_new, vsrc_new } llvm.amdgcn.permlane32.swap +def int_amdgcn_permlane32_swap : + Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, + llvm_i1_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, + ImmArg>, ImmArg>, IntrNoCallback, IntrNoFree]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 62688eae36e9a8..dad60a2535cf4d 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -46,6 +46,7 @@ def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>; +def int_dx_asdouble : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [llvm_anyint_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 528e19af5518df..12b50fc506516e 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -256,9 +256,12 @@ class LLVM_ABI Module { /// The module destructor. This will dropAllReferences. ~Module(); -/// @} -/// @name Module Level Accessors -/// @{ + /// Move assignment. + Module &operator=(Module &&Other); + + /// @} + /// @name Module Level Accessors + /// @{ /// Get the module identifier which is, essentially, the name of the module. /// @returns the module identifier as a string diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index 73f29d0f521edf..6db42878d2e521 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -129,9 +129,6 @@ struct MCRegisterDesc { // Is true for constant registers. bool IsConstant; - - // Is true for artificial registers. - bool IsArtificial; }; /// MCRegisterInfo base class - We assume that the target defines a static @@ -399,11 +396,6 @@ class MCRegisterInfo { /// Returns true if the given register is constant. bool isConstant(MCRegister RegNo) const { return get(RegNo).IsConstant; } - /// Returns true if the given register is artificial, which means it - /// represents a regunit that is not separately addressable but still needs to - /// be modelled, such as the top 16-bits of a 32-bit GPR. - bool isArtificial(MCRegister RegNo) const { return get(RegNo).IsArtificial; } - /// Return the number of registers this target has (useful for /// sizing arrays holding per register information) unsigned getNumRegs() const { diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 0f8c73f81cfa6d..87e4dbe4480910 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -830,6 +830,7 @@ struct BBAddrMap { bool BBFreq : 1; bool BrProb : 1; bool MultiBBRange : 1; + bool OmitBBEntries : 1; bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; } @@ -840,7 +841,8 @@ struct BBAddrMap { return (static_cast(FuncEntryCount) << 0) | (static_cast(BBFreq) << 1) | (static_cast(BrProb) << 2) | - (static_cast(MultiBBRange) << 3); + (static_cast(MultiBBRange) << 3) | + (static_cast(OmitBBEntries) << 4); } // Decodes from minimum bit width representation and validates no @@ -848,7 +850,8 @@ struct BBAddrMap { static Expected decode(uint8_t Val) { Features Feat{ static_cast(Val & (1 << 0)), static_cast(Val & (1 << 1)), - static_cast(Val & (1 << 2)), static_cast(Val & (1 << 3))}; + static_cast(Val & (1 << 2)), static_cast(Val & (1 << 3)), + static_cast(Val & (1 << 4))}; if (Feat.encode() != Val) return createStringError( std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x", @@ -857,9 +860,10 @@ struct BBAddrMap { } bool operator==(const Features &Other) const { - return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange) == + return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange, + OmitBBEntries) == std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb, - Other.MultiBBRange); + Other.MultiBBRange, Other.OmitBBEntries); } }; diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 058b9a1ce02e0b..1fad2343e2c961 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -686,7 +686,7 @@ class IndexedMemProfReader { // The number of elements in the radix tree array. unsigned RadixTreeSize = 0; - Error deserializeV12(const unsigned char *Start, const unsigned char *Ptr); + Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr); Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr); public: diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index f97fbd4bd64419..dd4d90864a08c4 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -25,8 +25,6 @@ struct MemProfRecord; // The versions of the indexed MemProf format enum IndexedVersion : uint64_t { - // Version 1: Added a version field to the header. - Version1 = 1, // Version 2: Added a call stack table. Version2 = 2, // Version 3: Added a radix tree for call stacks. Switched to linear IDs for @@ -34,7 +32,7 @@ enum IndexedVersion : uint64_t { Version3 = 3, }; -constexpr uint64_t MinimumSupportedVersion = Version1; +constexpr uint64_t MinimumSupportedVersion = Version2; constexpr uint64_t MaximumSupportedVersion = Version3; // Verify that the minimum and maximum satisfy the obvious constraint. @@ -391,14 +389,6 @@ struct AllocationInfo { PortableMemInfoBlock Info; AllocationInfo() = default; - AllocationInfo( - const IndexedAllocationInfo &IndexedAI, - llvm::function_ref IdToFrameCallback) { - for (const FrameId &Id : IndexedAI.CallStack) { - CallStack.push_back(IdToFrameCallback(Id)); - } - Info = IndexedAI.Info; - } void printYAML(raw_ostream &OS) const { OS << " -\n"; @@ -486,20 +476,6 @@ struct MemProfRecord { llvm::SmallVector> CallSites; MemProfRecord() = default; - MemProfRecord( - const IndexedMemProfRecord &Record, - llvm::function_ref IdToFrameCallback) { - for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) { - AllocSites.emplace_back(IndexedAI, IdToFrameCallback); - } - for (const ArrayRef Site : Record.CallSites) { - std::vector Frames; - for (const FrameId Id : Site) { - Frames.push_back(IdToFrameCallback(Id)); - } - CallSites.push_back(Frames); - } - } // Prints out the contents of the memprof record in YAML. void print(llvm::raw_ostream &OS) const { @@ -1141,21 +1117,20 @@ template class CallStackRadixTreeBuilder { // Encode a call stack into RadixArray. Return the starting index within // RadixArray. - LinearCallStackId - encodeCallStack(const llvm::SmallVector *CallStack, - const llvm::SmallVector *Prev, - std::optional> - MemProfFrameIndexes); + LinearCallStackId encodeCallStack( + const llvm::SmallVector *CallStack, + const llvm::SmallVector *Prev, + const llvm::DenseMap *MemProfFrameIndexes); public: CallStackRadixTreeBuilder() = default; // Build a radix tree array. - void build(llvm::MapVector> - &&MemProfCallStackData, - std::optional> - MemProfFrameIndexes, - llvm::DenseMap &FrameHistogram); + void + build(llvm::MapVector> + &&MemProfCallStackData, + const llvm::DenseMap *MemProfFrameIndexes, + llvm::DenseMap &FrameHistogram); ArrayRef getRadixArray() const { return RadixArray; } @@ -1163,18 +1138,6 @@ template class CallStackRadixTreeBuilder { return std::move(CallStackPos); } }; - -// Verify that each CallStackId is computed with hashCallStack. This function -// is intended to help transition from CallStack to CSId in -// IndexedAllocationInfo. -void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record); - -// Verify that each CallStackId is computed with hashCallStack. This function -// is intended to help transition from CallStack to CSId in -// IndexedAllocationInfo. -void verifyFunctionProfileData( - const llvm::MapVector - &FunctionProfileData); } // namespace memprof } // namespace llvm diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index e4485d997c34cc..7bb6c3156c43e0 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -118,6 +118,10 @@ def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc. def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2> ]>; +def SDTIntShiftPairOp : SDTypeProfile<2, 3, [ // shl_parts, sra_parts, srl_parts + SDTCisInt<0>, SDTCisSameAs<1, 0>, + SDTCisSameAs<2, 0>, SDTCisSameAs<3, 0>, SDTCisInt<4> +]>; def SDTIntShiftDOp: SDTypeProfile<1, 3, [ // fshl, fshr SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; @@ -422,6 +426,9 @@ def sra : SDNode<"ISD::SRA" , SDTIntShiftOp>; def shl : SDNode<"ISD::SHL" , SDTIntShiftOp>; def rotl : SDNode<"ISD::ROTL" , SDTIntShiftOp>; def rotr : SDNode<"ISD::ROTR" , SDTIntShiftOp>; +def shl_parts : SDNode<"ISD::SHL_PARTS" , SDTIntShiftPairOp>; +def sra_parts : SDNode<"ISD::SRA_PARTS" , SDTIntShiftPairOp>; +def srl_parts : SDNode<"ISD::SRL_PARTS" , SDTIntShiftPairOp>; def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>; def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>; def and : SDNode<"ISD::AND" , SDTIntBinOp, diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index cd8594d670502d..4028d5f4e2e1b8 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -565,10 +565,7 @@ static APInt getSizeWithOverflow(const SizeOffsetAPInt &Data) { APInt Size = Data.Size; APInt Offset = Data.Offset; - assert(!Offset.isNegative() && - "size for a pointer before the allocated object is ambiguous"); - - if (Size.ult(Offset)) + if (Offset.isNegative() || Size.ult(Offset)) return APInt::getZero(Size.getBitWidth()); return Size - Offset; @@ -756,10 +753,14 @@ OffsetSpan ObjectSizeOffsetVisitor::computeImpl(Value *V) { } // We end up pointing on a location that's outside of the original object. - // This is UB, and we'd rather return an empty location then. if (ORT.knownBefore() && ORT.Before.isNegative()) { - ORT.Before = APInt::getZero(ORT.Before.getBitWidth()); - ORT.After = APInt::getZero(ORT.Before.getBitWidth()); + // This is UB, and we'd rather return an empty location then. + if (Options.EvalMode == ObjectSizeOpts::Mode::Min || + Options.EvalMode == ObjectSizeOpts::Mode::Max) { + ORT.Before = APInt::getZero(ORT.Before.getBitWidth()); + ORT.After = APInt::getZero(ORT.Before.getBitWidth()); + } + // Otherwise it's fine, caller can handle negative offset. } return ORT; } diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index 8f79ccdb9ff75f..032c0de3c7a00f 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -329,6 +329,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO) STRINGIFY_CODE(FS, STACK_IDS) STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS) + STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY) } case bitc::METADATA_ATTACHMENT_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 3e6abacac27261..11fbe6e6158eec 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -987,6 +987,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { /// ids from the lists in the callsite and alloc entries to the index. std::vector StackIds; + /// Linearized radix tree of allocation contexts. See the description above + /// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format. + std::vector RadixArray; + public: ModuleSummaryIndexBitcodeReader( BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex, @@ -1013,6 +1017,8 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { TypeIdCompatibleVtableInfo &TypeId); std::vector parseParamAccesses(ArrayRef Record); + SmallVector parseAllocInfoContext(ArrayRef Record, + unsigned &I); template std::pair @@ -7544,6 +7550,48 @@ void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord( parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId); } +SmallVector ModuleSummaryIndexBitcodeReader::parseAllocInfoContext( + ArrayRef Record, unsigned &I) { + SmallVector StackIdList; + // For backwards compatibility with old format before radix tree was + // used, simply see if we found a radix tree array record (and thus if + // the RadixArray is non-empty). + if (RadixArray.empty()) { + unsigned NumStackEntries = Record[I++]; + assert(Record.size() - I >= NumStackEntries); + StackIdList.reserve(NumStackEntries); + for (unsigned J = 0; J < NumStackEntries; J++) { + assert(Record[I] < StackIds.size()); + StackIdList.push_back( + TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]])); + } + } else { + unsigned RadixIndex = Record[I++]; + // See the comments above CallStackRadixTreeBuilder in ProfileData/MemProf.h + // for a detailed description of the radix tree array format. Briefly, the + // first entry will be the number of frames, any negative values are the + // negative of the offset of the next frame, and otherwise the frames are in + // increasing linear order. + assert(RadixIndex < RadixArray.size()); + unsigned NumStackIds = RadixArray[RadixIndex++]; + StackIdList.reserve(NumStackIds); + while (NumStackIds--) { + assert(RadixIndex < RadixArray.size()); + unsigned Elem = RadixArray[RadixIndex]; + if (static_cast>(Elem) < 0) { + RadixIndex = RadixIndex - Elem; + assert(RadixIndex < RadixArray.size()); + Elem = RadixArray[RadixIndex]; + // We shouldn't encounter a second offset in a row. + assert(static_cast>(Elem) >= 0); + } + RadixIndex++; + StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[Elem])); + } + } + return StackIdList; +} + static void setSpecialRefs(SmallVectorImpl &Refs, unsigned ROCnt, unsigned WOCnt) { // Readonly and writeonly refs are in the end of the refs list. @@ -8010,6 +8058,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { break; } + case bitc::FS_CONTEXT_RADIX_TREE_ARRAY: { // [n x entry] + RadixArray = ArrayRef(Record); + break; + } + case bitc::FS_PERMODULE_CALLSITE_INFO: { unsigned ValueID = Record[0]; SmallVector StackIdList; @@ -8065,14 +8118,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { (Version < 10 && I < Record.size())) { assert(Record.size() - I >= 2); AllocationType AllocType = (AllocationType)Record[I++]; - unsigned NumStackEntries = Record[I++]; - assert(Record.size() - I >= NumStackEntries); - SmallVector StackIdList; - for (unsigned J = 0; J < NumStackEntries; J++) { - assert(Record[I] < StackIds.size()); - StackIdList.push_back( - TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]])); - } + auto StackIdList = parseAllocInfoContext(Record, I); MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); } // We either have nothing left or at least NumMIBs context size info @@ -8123,14 +8169,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { while (MIBsRead++ < NumMIBs) { assert(Record.size() - I >= 2); AllocationType AllocType = (AllocationType)Record[I++]; - unsigned NumStackEntries = Record[I++]; - assert(Record.size() - I >= NumStackEntries); - SmallVector StackIdList; - for (unsigned J = 0; J < NumStackEntries; J++) { - assert(Record[I] < StackIds.size()); - StackIdList.push_back( - TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]])); - } + auto StackIdList = parseAllocInfoContext(Record, I); MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); } assert(Record.size() - I >= NumVersions); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 59e070a5110620..63f4e34074e06b 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -60,6 +60,7 @@ #include "llvm/MC/StringTableBuilder.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Object/IRSymtab.h" +#include "llvm/ProfileData/MemProf.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -83,6 +84,7 @@ #include using namespace llvm; +using namespace llvm::memprof; static cl::opt IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25), @@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase { SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId, - const Function &F); + const Function &F, DenseMap &CallStackPos, + CallStackId &CallStackCount); void writeModuleLevelReferences(const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev, @@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord( } } +// Adds the allocation contexts to the CallStacks map. We simply use the +// size at the time the context was added as the CallStackId. This works because +// when we look up the call stacks later on we process the function summaries +// and their allocation records in the same exact order. +static void collectMemProfCallStacks( + FunctionSummary *FS, std::function GetStackIndex, + MapVector> &CallStacks) { + // The interfaces in ProfileData/MemProf.h use a type alias for a stack frame + // id offset into the index of the full stack frames. The ModuleSummaryIndex + // currently uses unsigned. Make sure these stay in sync. + static_assert(std::is_same_v); + for (auto &AI : FS->allocs()) { + for (auto &MIB : AI.MIBs) { + SmallVector StackIdIndices; + StackIdIndices.reserve(MIB.StackIdIndices.size()); + for (auto Id : MIB.StackIdIndices) + StackIdIndices.push_back(GetStackIndex(Id)); + // The CallStackId is the size at the time this context was inserted. + CallStacks.insert({CallStacks.size(), StackIdIndices}); + } + } +} + +// Build the radix tree from the accumulated CallStacks, write out the resulting +// linearized radix tree array, and return the map of call stack positions into +// this array for use when writing the allocation records. The returned map is +// indexed by a CallStackId which in this case is implicitly determined by the +// order of function summaries and their allocation infos being written. +static DenseMap writeMemoryProfileRadixTree( + MapVector> &&CallStacks, + BitstreamWriter &Stream, unsigned RadixAbbrev) { + assert(!CallStacks.empty()); + DenseMap FrameHistogram = + computeFrameHistogram(CallStacks); + CallStackRadixTreeBuilder Builder; + // We don't need a MemProfFrameIndexes map as we have already converted the + // full stack id hash to a linear offset into the StackIds array. + Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/nullptr, + FrameHistogram); + Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(), + RadixAbbrev); + return Builder.takeCallStackPos(); +} + static void writeFunctionHeapProfileRecords( BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule, std::function GetValueID, std::function GetStackIndex, - bool WriteContextSizeInfoIndex) { + bool WriteContextSizeInfoIndex, + DenseMap &CallStackPos, + CallStackId &CallStackCount) { SmallVector Record; for (auto &CI : FS->callsites()) { @@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords( Record.push_back(AI.Versions.size()); for (auto &MIB : AI.MIBs) { Record.push_back((uint8_t)MIB.AllocType); - Record.push_back(MIB.StackIdIndices.size()); - for (auto Id : MIB.StackIdIndices) - Record.push_back(GetStackIndex(Id)); + // Record the index into the radix tree array for this context. + assert(CallStackCount <= CallStackPos.size()); + Record.push_back(CallStackPos[CallStackCount++]); } if (!PerModule) { for (auto V : AI.Versions) @@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsRelBFAbbrev, unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev, - unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) { + unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F, + DenseMap &CallStackPos, + CallStackId &CallStackCount) { NameVals.push_back(ValueID); FunctionSummary *FS = cast(Summary); @@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( /*PerModule*/ true, /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); }, /*GetStackIndex*/ [&](unsigned I) { return I; }, - /*WriteContextSizeInfoIndex*/ true); + /*WriteContextSizeInfoIndex*/ true, CallStackPos, CallStackCount); auto SpecialRefCnts = FS->specialRefCounts(); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); @@ -4530,12 +4581,54 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib - // n x (alloc type, numstackids, numstackids x stackidindex) + // n x (alloc type, context radix tree index) // optional: nummib x (numcontext x total size) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY)); + // n x entry + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + // First walk through all the functions and collect the allocation contexts in + // their associated summaries, for use in constructing a radix tree of + // contexts. Note that we need to do this in the same order as the functions + // are processed further below since the call stack positions in the resulting + // radix tree array are identified based on this order. + MapVector> CallStacks; + for (const Function &F : M) { + // Summary emission does not support anonymous functions, they have to be + // renamed using the anonymous function renaming pass. + if (!F.hasName()) + report_fatal_error("Unexpected anonymous function when writing summary"); + + ValueInfo VI = Index->getValueInfo(F.getGUID()); + if (!VI || VI.getSummaryList().empty()) { + // Only declarations should not have a summary (a declaration might + // however have a summary if the def was in module level asm). + assert(F.isDeclaration()); + continue; + } + auto *Summary = VI.getSummaryList()[0].get(); + FunctionSummary *FS = cast(Summary); + collectMemProfCallStacks( + FS, /*GetStackIndex*/ [](unsigned I) { return I; }, CallStacks); + } + // Finalize the radix tree, write it out, and get the map of positions in the + // linearized tree array. + DenseMap CallStackPos; + if (!CallStacks.empty()) { + CallStackPos = + writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev); + } + + // Keep track of the current index into the CallStackPos map. + CallStackId CallStackCount = 0; + SmallVector NameVals; // Iterate over the list of functions instead of the Index to // ensure the ordering is stable. @@ -4555,7 +4648,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { auto *Summary = VI.getSummaryList()[0].get(); writePerModuleFunctionSummaryRecord( NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev, - FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F); + FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F, + CallStackPos, CallStackCount); } // Capture references from GlobalVariable initializers, which are outside @@ -4692,13 +4786,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver - // nummib x (alloc type, numstackids, numstackids x stackidindex), + // nummib x (alloc type, context radix tree index), // numver x version // optional: nummib x total size Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY)); + // n x entry + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool { if (DecSummaries == nullptr) return false; @@ -4735,6 +4836,41 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.clear(); }; + // First walk through all the functions and collect the allocation contexts in + // their associated summaries, for use in constructing a radix tree of + // contexts. Note that we need to do this in the same order as the functions + // are processed further below since the call stack positions in the resulting + // radix tree array are identified based on this order. + MapVector> CallStacks; + forEachSummary([&](GVInfo I, bool IsAliasee) { + GlobalValueSummary *S = I.second; + assert(S); + auto *FS = dyn_cast(S); + if (!FS) + return; + collectMemProfCallStacks( + FS, + /*GetStackIndex*/ + [&](unsigned I) { + // Get the corresponding index into the list of StackIds actually + // being written for this combined index (which may be a subset in + // the case of distributed indexes). + assert(StackIdIndicesToIndex.contains(I)); + return StackIdIndicesToIndex[I]; + }, + CallStacks); + }); + // Finalize the radix tree, write it out, and get the map of positions in the + // linearized tree array. + DenseMap CallStackPos; + if (!CallStacks.empty()) { + CallStackPos = + writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev); + } + + // Keep track of the current index into the CallStackPos map. + CallStackId CallStackCount = 0; + DenseSet DefOrUseGUIDs; forEachSummary([&](GVInfo I, bool IsAliasee) { GlobalValueSummary *S = I.second; @@ -4813,7 +4949,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { assert(StackIdIndicesToIndex.contains(I)); return StackIdIndicesToIndex[I]; }, - /*WriteContextSizeInfoIndex*/ false); + /*WriteContextSizeInfoIndex*/ false, CallStackPos, CallStackCount); NameVals.push_back(*ValueId); assert(ModuleIdMap.count(FS->modulePath())); diff --git a/llvm/lib/Bitcode/Writer/CMakeLists.txt b/llvm/lib/Bitcode/Writer/CMakeLists.txt index 1cc1802bc9aaf0..2c508ca9fae95e 100644 --- a/llvm/lib/Bitcode/Writer/CMakeLists.txt +++ b/llvm/lib/Bitcode/Writer/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_component_library(LLVMBitWriter Core MC Object + ProfileData Support TargetParser ) diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index f77b733c6c8f69..e7b9417de8c9f7 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -596,10 +596,10 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, // Following attributes are completely benign as far as calling convention // goes, they shouldn't affect whether the call is a tail call. - for (const auto &Attr : - {Attribute::Alignment, Attribute::Dereferenceable, - Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) { + for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull, Attribute::NoUndef, + Attribute::Range, Attribute::NoFPClass}) { CallerAttrs.removeAttribute(Attr); CalleeAttrs.removeAttribute(Attr); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index ff9be3dc24ce59..3072edc5088e2a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -161,6 +161,13 @@ static cl::bits PgoAnalysisMapFeatures( "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is " "extracted from PGO related analysis.")); +static cl::opt BBAddrMapSkipEmitBBEntries( + "basic-block-address-map-skip-bb-entries", + cl::desc("Skip emitting basic block entries in the SHT_LLVM_BB_ADDR_MAP " + "section. It's used to save binary size when BB entries are " + "unnecessary for some PGOAnalysisMap features."), + cl::Hidden, cl::init(false)); + static cl::opt EmitJumpTableSizesSection( "emit-jump-table-sizes-section", cl::desc("Emit a section containing jump table addresses and sizes"), @@ -1411,8 +1418,15 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) { bool BrProbEnabled = AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb)); + + if ((BBFreqEnabled || BrProbEnabled) && BBAddrMapSkipEmitBBEntries) { + MF.getFunction().getContext().emitError( + "BB entries info is required for BBFreq and BrProb " + "features"); + } return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled, - MF.hasBBSections() && NumMBBSectionRanges > 1}; + MF.hasBBSections() && NumMBBSectionRanges > 1, + static_cast(BBAddrMapSkipEmitBBEntries)}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { @@ -1469,24 +1483,28 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->emitULEB128IntValue(MBBSectionNumBlocks[MBB.getSectionID()]); PrevMBBEndSymbol = MBBSymbol; } - // TODO: Remove this check when version 1 is deprecated. - if (BBAddrMapVersion > 1) { - OutStreamer->AddComment("BB id"); - // Emit the BB ID for this basic block. - // We only emit BaseID since CloneID is unset for - // -basic-block-adress-map. - // TODO: Emit the full BBID when labels and sections can be mixed - // together. - OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); + + if (!Features.OmitBBEntries) { + // TODO: Remove this check when version 1 is deprecated. + if (BBAddrMapVersion > 1) { + OutStreamer->AddComment("BB id"); + // Emit the BB ID for this basic block. + // We only emit BaseID since CloneID is unset for + // -basic-block-adress-map. + // TODO: Emit the full BBID when labels and sections can be mixed + // together. + OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); + } + // Emit the basic block offset relative to the end of the previous block. + // This is zero unless the block is padded due to alignment. + emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol); + // Emit the basic block size. When BBs have alignments, their size cannot + // always be computed from their offsets. + emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); + // Emit the Metadata. + OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); } - // Emit the basic block offset relative to the end of the previous block. - // This is zero unless the block is padded due to alignment. - emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol); - // Emit the basic block size. When BBs have alignments, their size cannot - // always be computed from their offsets. - emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); - // Emit the Metadata. - OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + PrevMBBEndSymbol = MBB.getEndSymbol(); } diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 4274c1347d648a..5ca223852cbde3 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -175,8 +175,7 @@ struct CachingVPExpander { VPIntrinsic &PI); /// Lower this VP int call to a unpredicated int call. - Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI, - unsigned UnpredicatedIntrinsicID); + Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI); /// Lower this VP fp call to a unpredicated fp call. Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, @@ -287,33 +286,19 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, return NewBinOp; } -Value *CachingVPExpander::expandPredicationToIntCall( - IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { - switch (UnpredicatedIntrinsicID) { - case Intrinsic::abs: - case Intrinsic::smax: - case Intrinsic::smin: - case Intrinsic::umax: - case Intrinsic::umin: { - Value *Op0 = VPI.getOperand(0); - Value *Op1 = VPI.getOperand(1); - Value *NewOp = Builder.CreateIntrinsic( - UnpredicatedIntrinsicID, {VPI.getType()}, {Op0, Op1}, - /*FMFSource=*/nullptr, VPI.getName()); - replaceOperation(*NewOp, VPI); - return NewOp; - } - case Intrinsic::bswap: - case Intrinsic::bitreverse: { - Value *Op = VPI.getOperand(0); - Value *NewOp = - Builder.CreateIntrinsic(UnpredicatedIntrinsicID, {VPI.getType()}, {Op}, - /*FMFSource=*/nullptr, VPI.getName()); - replaceOperation(*NewOp, VPI); - return NewOp; - } +Value *CachingVPExpander::expandPredicationToIntCall(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + std::optional FID = VPI.getFunctionalIntrinsicID(); + if (!FID) + return nullptr; + SmallVector Argument; + for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { + Argument.push_back(VPI.getOperand(i)); } - return nullptr; + Value *NewOp = Builder.CreateIntrinsic(FID.value(), {VPI.getType()}, Argument, + /*FMFSource=*/nullptr, VPI.getName()); + replaceOperation(*NewOp, VPI); + return NewOp; } Value *CachingVPExpander::expandPredicationToFPCall( @@ -323,20 +308,15 @@ Value *CachingVPExpander::expandPredicationToFPCall( switch (UnpredicatedIntrinsicID) { case Intrinsic::fabs: - case Intrinsic::sqrt: { - Value *Op0 = VPI.getOperand(0); - Value *NewOp = - Builder.CreateIntrinsic(UnpredicatedIntrinsicID, {VPI.getType()}, {Op0}, - /*FMFSource=*/nullptr, VPI.getName()); - replaceOperation(*NewOp, VPI); - return NewOp; - } + case Intrinsic::sqrt: case Intrinsic::maxnum: case Intrinsic::minnum: { - Value *Op0 = VPI.getOperand(0); - Value *Op1 = VPI.getOperand(1); + SmallVector Argument; + for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { + Argument.push_back(VPI.getOperand(i)); + } Value *NewOp = Builder.CreateIntrinsic( - UnpredicatedIntrinsicID, {VPI.getType()}, {Op0, Op1}, + UnpredicatedIntrinsicID, {VPI.getType()}, Argument, /*FMFSource=*/nullptr, VPI.getName()); replaceOperation(*NewOp, VPI); return NewOp; @@ -438,56 +418,13 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, VPIntrinsic &VPI) { - Value *CastOp = nullptr; - switch (VPI.getIntrinsicID()) { - default: - llvm_unreachable("Not a VP cast intrinsic"); - case Intrinsic::vp_sext: - CastOp = - Builder.CreateSExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_zext: - CastOp = - Builder.CreateZExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_trunc: - CastOp = - Builder.CreateTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_inttoptr: - CastOp = - Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_ptrtoint: - CastOp = - Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_fptosi: - CastOp = - Builder.CreateFPToSI(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; + Intrinsic::ID VPID = VPI.getIntrinsicID(); + unsigned CastOpcode = VPIntrinsic::getFunctionalOpcodeForVP(VPID).value(); + assert(Instruction::isCast(CastOpcode)); + Value *CastOp = + Builder.CreateCast(Instruction::CastOps(CastOpcode), VPI.getOperand(0), + VPI.getType(), VPI.getName()); - case Intrinsic::vp_fptoui: - CastOp = - Builder.CreateFPToUI(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_sitofp: - CastOp = - Builder.CreateSIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_uitofp: - CastOp = - Builder.CreateUIToFP(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_fptrunc: - CastOp = - Builder.CreateFPTrunc(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - case Intrinsic::vp_fpext: - CastOp = - Builder.CreateFPExt(VPI.getOperand(0), VPI.getType(), VPI.getName()); - break; - } replaceOperation(*CastOp, VPI); return CastOp; } @@ -672,8 +609,7 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { case Intrinsic::vp_umin: case Intrinsic::vp_bswap: case Intrinsic::vp_bitreverse: - return expandPredicationToIntCall(Builder, VPI, - VPI.getFunctionalIntrinsicID().value()); + return expandPredicationToIntCall(Builder, VPI); case Intrinsic::vp_fabs: case Intrinsic::vp_sqrt: case Intrinsic::vp_maxnum: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index b7541effafe5ce..30c2d089c31214 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -101,6 +101,12 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx, }; } +LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isPointerVector(); + }; +} + LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx, LLT EltTy) { return [=](const LegalityQuery &Query) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 062dbbe904de33..321760ef822bc2 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +// This bitcasts a shuffle vector to a different type currently of the same +// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr +// will be used instead. +// +// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask +// ===> +// <4 x s64> = G_PTRTOINT <4 x p0> +// <4 x s64> = G_PTRTOINT <4 x p0> +// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask +// <16 x p0> = G_INTTOPTR <16 x s64> +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + auto ShuffleMI = cast(&MI); + LLT DstTy = MRI.getType(ShuffleMI->getReg(0)); + LLT SrcTy = MRI.getType(ShuffleMI->getReg(1)); + + // We currently only handle vectors of the same size. + if (TypeIdx != 0 || + CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() || + CastTy.getElementCount() != DstTy.getElementCount()) + return UnableToLegalize; + + LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType()); + + auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1)); + auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2)); + auto Shuf = + MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask()); + MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf); + + MI.eraseFromParent(); + return Legalized; +} + /// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy. /// /// = G_EXTRACT_SUBVECTOR , N @@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { return bitcastInsertVectorElt(MI, TypeIdx, CastTy); case TargetOpcode::G_CONCAT_VECTORS: return bitcastConcatVector(MI, TypeIdx, CastTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return bitcastShuffleVector(MI, TypeIdx, CastTy); case TargetOpcode::G_EXTRACT_SUBVECTOR: return bitcastExtractSubvector(MI, TypeIdx, CastTy); case TargetOpcode::G_INSERT_SUBVECTOR: diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index d910e33ac40f65..be347006a81f92 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst, return buildCopy(Dst, Src); unsigned Opcode; - if (SrcTy.isPointer() && DstTy.isScalar()) + if (SrcTy.isPointerOrPointerVector()) Opcode = TargetOpcode::G_PTRTOINT; - else if (DstTy.isPointer() && SrcTy.isScalar()) + else if (DstTy.isPointerOrPointerVector()) Opcode = TargetOpcode::G_INTTOPTR; else { - assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet"); + assert(!SrcTy.isPointerOrPointerVector() && + !DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet"); Opcode = TargetOpcode::G_BITCAST; } diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index d8b3190f31003e..d4ac131a32a959 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -164,14 +164,6 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI, TRI->getCoveringSubRegIndexes(*MRI, TargetRegClass, NeedDef, SubRegIndexNeedInsert); - // It's not possible to create the INIT_UNDEF when there is no register - // class associated for the subreg. This may happen for artificial subregs - // that are not directly addressable. - if (any_of(SubRegIndexNeedInsert, [&](unsigned Ind) -> bool { - return !TRI->getSubRegisterClass(TargetRegClass, Ind); - })) - continue; - Register LatestReg = Reg; for (auto ind : SubRegIndexNeedInsert) { Changed = true; diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index ba0015d3ddacb6..c31454a8affda5 100644 --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -128,6 +129,9 @@ static bool isColdBlock(const MachineBasicBlock &MBB, } bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { + // Do not split functions when -basic-block-sections=all is specified. + if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All) + return false; // We target functions with profile data. Static information in the form // of exception handling code may be split to cold if user passes the // mfs-split-ehcode flag. @@ -139,6 +143,14 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { if (!TII.isFunctionSafeToSplit(MF)) return false; + // Do not split functions with BasicBlockSections profiles as they will + // be split by the BasicBlockSections pass. + auto BBSectionsProfile = + getAnalysisIfAvailable(); + if (BBSectionsProfile != nullptr && + BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName())) + return false; + // Renumbering blocks here preserves the order of the blocks as // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort // blocks. Preserving the order of blocks is essential to retaining decisions @@ -201,6 +213,7 @@ void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addUsedIfAvailable(); } char MachineFunctionSplitter::ID = 0; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 3910046a1652b1..b08a93ae9a6d58 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -3033,7 +3033,11 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!MOP.getReg().isPhysical()) continue; - if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg)) + if (MOP.getReg() != Reg && + all_of(TRI->regunits(Reg), [&](const MCRegUnit RegUnit) { + return llvm::is_contained(TRI->regunits(MOP.getReg()), + RegUnit); + })) Bad = false; } } diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 073ce367af1b85..2e1f498c090d1a 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1374,27 +1374,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr &NewMI = *std::prev(MII); NewMI.setDebugLoc(DL); - // In a situation like the following: - // - // undef %2.subreg:reg = INST %1:reg ; DefMI (rematerializable), - // ; DefSubIdx = subreg - // %3:reg = COPY %2 ; SrcIdx = DstIdx = 0 - // .... = SOMEINSTR %3:reg - // - // there are no subranges for %3 so after rematerialization we need - // to explicitly create them. Undefined subranges are removed later on. - if (DstReg.isVirtual() && DefSubIdx && !CP.getSrcIdx() && !CP.getDstIdx() && - MRI->shouldTrackSubRegLiveness(DstReg)) { - LiveInterval &DstInt = LIS->getInterval(DstReg); - if (!DstInt.hasSubRanges()) { - LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg); - LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(DefSubIdx); - LaneBitmask UnusedLanes = FullMask & ~UsedLanes; - DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UsedLanes, DstInt); - DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UnusedLanes, DstInt); - } - } - // In a situation like the following: // %0:subreg = instr ; DefMI, subreg = DstIdx // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 @@ -1507,7 +1486,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewRC = TRI->getCommonSubClass(NewRC, DefRC); assert(NewRC && "subreg chosen for remat incompatible with instruction"); } - // Remap subranges to new lanemask and change register class. LiveInterval &DstInt = LIS->getInterval(DstReg); for (LiveInterval::SubRange &SR : DstInt.subranges()) { diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 81796fcf2842a8..6b6d590a7ff8b2 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -127,77 +127,26 @@ class SelectOptimizeImpl { /// act like selects. For example Or(Zext(icmp), X) can be treated like /// select(icmp, X|1, X). class SelectLike { - SelectLike(Instruction *I) : I(I) {} - /// The select (/or) instruction. Instruction *I; /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as /// opposed to the original condition. bool Inverted = false; - public: - /// Match a select or select-like instruction, returning a SelectLike. - static SelectLike match(Instruction *I) { - // Select instruction are what we are usually looking for. - if (isa(I)) - return SelectLike(I); - - // An Or(zext(i1 X), Y) can also be treated like a select, with condition - // C and values Y|1 and Y. - Value *X; - if (PatternMatch::match( - I, m_c_Or(m_OneUse(m_ZExt(m_Value(X))), m_Value())) && - X->getType()->isIntegerTy(1)) - return SelectLike(I); - - return SelectLike(nullptr); - } + /// The index of the operand that depends on condition. Only for select-like + /// instruction such as Or/Add. + unsigned CondIdx; - bool isValid() { return I; } - operator bool() { return isValid(); } - - /// Invert the select by inverting the condition and switching the operands. - void setInverted() { - assert(!Inverted && "Trying to invert an inverted SelectLike"); - assert(isa(getCondition()) && - cast(getCondition())->getOpcode() == - Instruction::Xor); - Inverted = true; - } - bool isInverted() const { return Inverted; } + public: + SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0) + : I(I), Inverted(Inverted), CondIdx(CondIdx) {} Instruction *getI() { return I; } const Instruction *getI() const { return I; } Type *getType() const { return I->getType(); } - Value *getNonInvertedCondition() const { - if (auto *Sel = dyn_cast(I)) - return Sel->getCondition(); - // Or(zext) case - if (auto *BO = dyn_cast(I)) { - Value *X; - if (PatternMatch::match(BO->getOperand(0), - m_OneUse(m_ZExt(m_Value(X))))) - return X; - if (PatternMatch::match(BO->getOperand(1), - m_OneUse(m_ZExt(m_Value(X))))) - return X; - } - - llvm_unreachable("Unhandled case in getCondition"); - } - - /// Return the condition for the SelectLike instruction. For example the - /// condition of a select or c in `or(zext(c), x)` - Value *getCondition() const { - Value *CC = getNonInvertedCondition(); - // For inverted conditions the CC is checked when created to be a not - // (xor) instruction. - if (Inverted) - return cast(CC)->getOperand(0); - return CC; - } + unsigned getConditionOpIndex() { return CondIdx; }; /// Return the true value for the SelectLike instruction. Note this may not /// exist for all SelectLike instructions. For example, for `or(zext(c), x)` @@ -224,74 +173,56 @@ class SelectOptimizeImpl { return getTrueValue(/*HonorInverts=*/false); if (auto *Sel = dyn_cast(I)) return Sel->getFalseValue(); - // Or(zext) case - return the operand which is not the zext. - if (auto *BO = dyn_cast(I)) { - Value *X; - if (PatternMatch::match(BO->getOperand(0), - m_OneUse(m_ZExt(m_Value(X))))) - return BO->getOperand(1); - if (PatternMatch::match(BO->getOperand(1), - m_OneUse(m_ZExt(m_Value(X))))) - return BO->getOperand(0); - } + // We are on the branch where the condition is zero, which means BinOp + // does not perform any computation, and we can simply return the operand + // that is not related to the condition + if (auto *BO = dyn_cast(I)) + return BO->getOperand(1 - CondIdx); llvm_unreachable("Unhandled case in getFalseValue"); } - /// Return the NonPredCost cost of the true op, given the costs in - /// InstCostMap. This may need to be generated for select-like instructions. - Scaled64 getTrueOpCost(DenseMap &InstCostMap, - const TargetTransformInfo *TTI) { - if (isa(I)) - if (auto *I = dyn_cast(getTrueValue())) { - auto It = InstCostMap.find(I); - return It != InstCostMap.end() ? It->second.NonPredCost - : Scaled64::getZero(); - } - - // Or case - add the cost of an extra Or to the cost of the False case. - if (isa(I)) - if (auto I = dyn_cast(getFalseValue())) { - auto It = InstCostMap.find(I); - if (It != InstCostMap.end()) { - InstructionCost OrCost = TTI->getArithmeticInstrCost( - Instruction::Or, I->getType(), TargetTransformInfo::TCK_Latency, - {TargetTransformInfo::OK_AnyValue, - TargetTransformInfo::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2}); - return It->second.NonPredCost + Scaled64::get(*OrCost.getValue()); - } - } - - return Scaled64::getZero(); - } - - /// Return the NonPredCost cost of the false op, given the costs in - /// InstCostMap. This may need to be generated for select-like instructions. - Scaled64 - getFalseOpCost(DenseMap &InstCostMap, - const TargetTransformInfo *TTI) { - if (isa(I)) - if (auto *I = dyn_cast(getFalseValue())) { - auto It = InstCostMap.find(I); + /// Return the NonPredCost cost of the op on \p isTrue branch, given the + /// costs in \p InstCostMap. This may need to be generated for select-like + /// instructions. + Scaled64 getOpCostOnBranch( + bool IsTrue, const DenseMap &InstCostMap, + const TargetTransformInfo *TTI) { + auto *V = IsTrue ? getTrueValue() : getFalseValue(); + if (V) { + if (auto *IV = dyn_cast(V)) { + auto It = InstCostMap.find(IV); return It != InstCostMap.end() ? It->second.NonPredCost : Scaled64::getZero(); } - - // Or case - return the cost of the false case - if (isa(I)) - if (auto I = dyn_cast(getFalseValue())) - if (auto It = InstCostMap.find(I); It != InstCostMap.end()) - return It->second.NonPredCost; - - return Scaled64::getZero(); + return Scaled64::getZero(); + } + // If getTrue(False)Value() return nullptr, it means we are dealing with + // select-like instructions on the branch where the actual computation is + // happening. In that case the cost is equal to the cost of computation + + // cost of non-dependant on condition operand + InstructionCost Cost = TTI->getArithmeticInstrCost( + getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency, + {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2}); + auto TotalCost = Scaled64::get(*Cost.getValue()); + if (auto *OpI = dyn_cast(I->getOperand(1 - CondIdx))) { + auto It = InstCostMap.find(OpI); + if (It != InstCostMap.end()) + TotalCost += It->second.NonPredCost; + } + return TotalCost; } }; private: - // Select groups consist of consecutive select instructions with the same - // condition. - using SelectGroup = SmallVector; + // Select groups consist of consecutive select-like instructions with the same + // condition. Between select-likes could be any number of auxiliary + // instructions related to the condition like not, zext + struct SelectGroup { + Value *Condition; + SmallVector Selects; + }; using SelectGroups = SmallVector; // Converts select instructions of a function to conditional jumps when deemed @@ -351,6 +282,11 @@ class SelectOptimizeImpl { SmallDenseMap getSImap(const SelectGroups &SIGroups); + // Returns a map from select-like instructions to the corresponding select + // group. + SmallDenseMap + getSGmap(const SelectGroups &SIGroups); + // Returns the latency cost of a given instruction. std::optional computeInstCost(const Instruction *I); @@ -529,34 +465,45 @@ void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F, } } -/// If \p isTrue is true, return the true value of \p SI, otherwise return -/// false value of \p SI. If the true/false value of \p SI is defined by any -/// select instructions in \p Selects, look through the defining select -/// instruction until the true/false value is not defined in \p Selects. -static Value * -getTrueOrFalseValue(SelectOptimizeImpl::SelectLike SI, bool isTrue, - const SmallPtrSet &Selects, - IRBuilder<> &IB) { - Value *V = nullptr; - for (SelectInst *DefSI = dyn_cast(SI.getI()); - DefSI != nullptr && Selects.count(DefSI); - DefSI = dyn_cast(V)) { - if (DefSI->getCondition() == SI.getCondition()) - V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); - else // Handle inverted SI - V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); +/// Returns optimised value on \p IsTrue branch. For SelectInst that would be +/// either True or False value. For (BinaryOperator) instructions, where the +/// condition may be skipped, the operation will use a non-conditional operand. +/// For example, for `or(V,zext(cond))` this function would return V. +/// However, if the conditional operand on \p IsTrue branch matters, we create a +/// clone of instruction at the end of that branch \p B and replace the +/// condition operand with a constant. +/// +/// Also /p OptSelects contains previously optimised select-like instructions. +/// If the current value uses one of the optimised values, we can optimise it +/// further by replacing it with the corresponding value on the given branch +static Value *getTrueOrFalseValue( + SelectOptimizeImpl::SelectLike &SI, bool isTrue, + SmallDenseMap, 2> &OptSelects, + BasicBlock *B) { + Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue(); + if (V) { + auto *IV = dyn_cast(V); + if (IV && OptSelects.count(IV)) + return isTrue ? OptSelects[IV].first : OptSelects[IV].second; + return V; } - if (isa(SI.getI())) { - assert(SI.getI()->getOpcode() == Instruction::Or && - "Only currently handling Or instructions."); - V = SI.getFalseValue(); - if (isTrue) - V = IB.CreateOr(V, ConstantInt::get(V->getType(), 1)); - } + auto *BO = cast(SI.getI()); + assert(BO->getOpcode() == Instruction::Or && + "Only currently handling Or instructions."); + + auto *CBO = BO->clone(); + auto CondIdx = SI.getConditionOpIndex(); + CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1)); - assert(V && "Failed to get select true/false value"); - return V; + unsigned OtherIdx = 1 - CondIdx; + if (auto *IV = dyn_cast(CBO->getOperand(OtherIdx))) { + if (OptSelects.count(IV)) + CBO->setOperand(OtherIdx, + isTrue ? OptSelects[IV].first : OptSelects[IV].second); + } + CBO->insertBefore(B->getTerminator()); + return CBO; } void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { @@ -602,7 +549,9 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { SmallVector, 2> TrueSlices, FalseSlices; typedef std::stack::size_type StackSizeType; StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; - for (SelectLike SI : ASI) { + for (SelectLike &SI : ASI.Selects) { + if (!isa(SI.getI())) + continue; // For each select, compute the sinkable dependence chains of the true and // false operands. if (auto *TI = dyn_cast_or_null(SI.getTrueValue())) { @@ -649,8 +598,8 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { } // We split the block containing the select(s) into two blocks. - SelectLike SI = ASI.front(); - SelectLike LastSI = ASI.back(); + SelectLike &SI = ASI.Selects.front(); + SelectLike &LastSI = ASI.Selects.back(); BasicBlock *StartBlock = SI.getI()->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI())); // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With @@ -664,19 +613,21 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); - // Move any debug/pseudo instructions and not's that were in-between the + // Move any debug/pseudo and auxiliary instructions that were in-between the // select group to the newly-created end block. SmallVector SinkInstrs; auto DIt = SI.getI()->getIterator(); + auto NIt = ASI.Selects.begin(); while (&*DIt != LastSI.getI()) { - if (DIt->isDebugOrPseudoInst()) - SinkInstrs.push_back(&*DIt); - if (match(&*DIt, m_Not(m_Specific(SI.getCondition())))) + if (NIt != ASI.Selects.end() && &*DIt == NIt->getI()) + ++NIt; + else SinkInstrs.push_back(&*DIt); DIt++; } + auto InsertionPoint = EndBlock->getFirstInsertionPt(); for (auto *DI : SinkInstrs) - DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); + DI->moveBeforePreserving(&*InsertionPoint); // Duplicate implementation for DbgRecords, the non-instruction debug-info // format. Helper lambda for moving DbgRecords to the end block. @@ -700,7 +651,15 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // At least one will become an actual new basic block. BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; - if (!TrueSlicesInterleaved.empty()) { + // Checks if select-like instruction would materialise on the given branch + auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) { + for (auto &SL : SG.Selects) { + if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr) + return true; + } + return false; + }; + if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) { TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink", EndBlock->getParent(), EndBlock); TrueBranch = BranchInst::Create(EndBlock, TrueBlock); @@ -708,7 +667,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (Instruction *TrueInst : TrueSlicesInterleaved) TrueInst->moveBefore(TrueBranch); } - if (!FalseSlicesInterleaved.empty()) { + if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) { FalseBlock = BasicBlock::Create(EndBlock->getContext(), "select.false.sink", EndBlock->getParent(), EndBlock); @@ -748,93 +707,166 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { FT = FalseBlock; } IRBuilder<> IB(SI.getI()); - auto *CondFr = IB.CreateFreeze(SI.getCondition(), - SI.getCondition()->getName() + ".frozen"); + auto *CondFr = + IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen"); - SmallPtrSet INS; - for (auto SI : ASI) - INS.insert(SI.getI()); + SmallDenseMap, 2> INS; // Use reverse iterator because later select may use the value of the // earlier select, and we need to propagate value through earlier select // to get the PHI operand. - for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { - SelectLike SI = *It; + InsertionPoint = EndBlock->begin(); + for (SelectLike &SI : ASI.Selects) { // The select itself is replaced with a PHI Node. PHINode *PN = PHINode::Create(SI.getType(), 2, ""); - PN->insertBefore(EndBlock->begin()); + PN->insertBefore(InsertionPoint); PN->takeName(SI.getI()); - PN->addIncoming(getTrueOrFalseValue(SI, true, INS, IB), TrueBlock); - PN->addIncoming(getTrueOrFalseValue(SI, false, INS, IB), FalseBlock); - PN->setDebugLoc(SI.getI()->getDebugLoc()); + // Current instruction might be a condition of some other group, so we + // need to replace it there to avoid dangling pointer + if (PN->getType()->isIntegerTy(1)) { + for (auto &SG : ProfSIGroups) { + if (SG.Condition == SI.getI()) + SG.Condition = PN; + } + } SI.getI()->replaceAllUsesWith(PN); - INS.erase(SI.getI()); + auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock); + auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock); + INS[PN] = {TV, FV}; + PN->addIncoming(TV, TrueBlock); + PN->addIncoming(FV, FalseBlock); + PN->setDebugLoc(SI.getI()->getDebugLoc()); ++NumSelectsConverted; } IB.CreateCondBr(CondFr, TT, FT, SI.getI()); // Remove the old select instructions, now that they are not longer used. - for (auto SI : ASI) + for (SelectLike &SI : ASI.Selects) SI.getI()->eraseFromParent(); } } void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups) { + // Represents something that can be considered as select instruction. + // Auxiliary instruction are instructions that depends on a condition and have + // zero or some constant value on True/False branch, such as: + // * ZExt(1bit) + // * Not(1bit) + struct SelectLikeInfo { + Value *Cond; + bool IsAuxiliary; + bool IsInverted; + unsigned ConditionIdx; + }; + + std::map SelectInfo; + + // Check if the instruction is SelectLike or might be part of SelectLike + // expression, put information into SelectInfo and return the iterator to the + // inserted position. + auto ProcessSelectInfo = [&SelectInfo](Instruction *I) { + Value *Cond; + if (match(I, m_OneUse(m_ZExt(m_Value(Cond)))) && + Cond->getType()->isIntegerTy(1)) { + bool Inverted = match(Cond, m_Not(m_Value(Cond))); + return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first; + } + + if (match(I, m_Not(m_Value(Cond)))) { + return SelectInfo.insert({I, {Cond, true, true, 0}}).first; + } + + // Select instruction are what we are usually looking for. + if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) { + bool Inverted = match(Cond, m_Not(m_Value(Cond))); + return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first; + } + + // An Or(zext(i1 X), Y) can also be treated like a select, with condition X + // and values Y|1 and Y. + if (auto *BO = dyn_cast(I)) { + if (BO->getType()->isIntegerTy(1) || BO->getOpcode() != Instruction::Or) + return SelectInfo.end(); + + for (unsigned Idx = 0; Idx < 2; Idx++) { + auto *Op = BO->getOperand(Idx); + auto It = SelectInfo.find(Op); + if (It != SelectInfo.end() && It->second.IsAuxiliary) + return SelectInfo + .insert({I, {It->second.Cond, false, It->second.IsInverted, Idx}}) + .first; + } + } + return SelectInfo.end(); + }; + + bool AlreadyProcessed = false; BasicBlock::iterator BBIt = BB.begin(); + std::map::iterator It; while (BBIt != BB.end()) { Instruction *I = &*BBIt++; - if (SelectLike SI = SelectLike::match(I)) { - if (!TTI->shouldTreatInstructionLikeSelect(I)) - continue; + if (I->isDebugOrPseudoInst()) + continue; - SelectGroup SIGroup; - SIGroup.push_back(SI); - while (BBIt != BB.end()) { - Instruction *NI = &*BBIt; - // Debug/pseudo instructions should be skipped and not prevent the - // formation of a select group. - if (NI->isDebugOrPseudoInst()) { - ++BBIt; - continue; - } + if (!AlreadyProcessed) + It = ProcessSelectInfo(I); + else + AlreadyProcessed = false; - // Skip not(select(..)), if the not is part of the same select group - if (match(NI, m_Not(m_Specific(SI.getCondition())))) { - ++BBIt; - continue; - } + if (It == SelectInfo.end() || It->second.IsAuxiliary) + continue; + + if (!TTI->shouldTreatInstructionLikeSelect(I)) + continue; + + Value *Cond = It->second.Cond; + // Vector conditions are not supported. + if (!Cond->getType()->isIntegerTy(1)) + continue; + + SelectGroup SIGroup{Cond}; + SIGroup.Selects.emplace_back(I, It->second.IsInverted, + It->second.ConditionIdx); - // We only allow selects in the same group, not other select-like - // instructions. - if (!isa(NI)) - break; - - SelectLike NSI = SelectLike::match(NI); - if (NSI && SI.getCondition() == NSI.getCondition()) { - SIGroup.push_back(NSI); - } else if (NSI && match(NSI.getCondition(), - m_Not(m_Specific(SI.getCondition())))) { - NSI.setInverted(); - SIGroup.push_back(NSI); - } else - break; + // If the select type is not supported, no point optimizing it. + // Instruction selection will take care of it. + if (!isSelectKindSupported(SIGroup.Selects.front())) + continue; + + while (BBIt != BB.end()) { + Instruction *NI = &*BBIt; + // Debug/pseudo instructions should be skipped and not prevent the + // formation of a select group. + if (NI->isDebugOrPseudoInst()) { ++BBIt; + continue; } - // If the select type is not supported, no point optimizing it. - // Instruction selection will take care of it. - if (!isSelectKindSupported(SI)) - continue; + It = ProcessSelectInfo(NI); + if (It == SelectInfo.end()) { + AlreadyProcessed = true; + break; + } - LLVM_DEBUG({ - dbgs() << "New Select group with\n"; - for (auto SI : SIGroup) - dbgs() << " " << *SI.getI() << "\n"; - }); + // Auxiliary with same condition + auto [CurrCond, IsAux, IsRev, CondIdx] = It->second; + if (Cond != CurrCond) { + AlreadyProcessed = true; + break; + } - SIGroups.push_back(SIGroup); + if (!IsAux) + SIGroup.Selects.emplace_back(NI, IsRev, CondIdx); + ++BBIt; } + LLVM_DEBUG({ + dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n"; + for (auto &SI : SIGroup.Selects) + dbgs() << " " << *SI.getI() << "\n"; + }); + + SIGroups.push_back(SIGroup); } } @@ -878,12 +910,13 @@ void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( // Assuming infinite resources, the cost of a group of instructions is the // cost of the most expensive instruction of the group. Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); - for (SelectLike SI : ASI) { + for (SelectLike &SI : ASI.Selects) { SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost); BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost); } if (BranchCost < SelectCost) { - OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front().getI()); + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", + ASI.Selects.front().getI()); OR << "Profitable to convert to branch (loop analysis). BranchCost=" << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() << ". "; @@ -892,7 +925,7 @@ void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( ProfSIGroups.push_back(ASI); } else { OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", - ASI.front().getI()); + ASI.Selects.front().getI()); ORmiss << "Select is more profitable (loop analysis). BranchCost=" << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() << ". "; @@ -903,7 +936,7 @@ void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( bool SelectOptimizeImpl::isConvertToBranchProfitableBase( const SelectGroup &ASI) { - SelectLike SI = ASI.front(); + const SelectLike &SI = ASI.Selects.front(); LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI() << "\n"); OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI()); @@ -963,14 +996,14 @@ static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI, bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) { bool ColdOperand = false; uint64_t TrueWeight, FalseWeight, TotalWeight; - if (extractBranchWeights(ASI.front(), TrueWeight, FalseWeight)) { + if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) { uint64_t MinWeight = std::min(TrueWeight, FalseWeight); TotalWeight = TrueWeight + FalseWeight; // Is there a path with frequency 100 * MinWeight; } else if (PSI->hasProfileSummary()) { OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", - ASI.front().getI()); + ASI.Selects.front().getI()); ORmiss << "Profile data available but missing branch-weights metadata for " "select instruction. "; EmitAndPrintRemark(ORE, ORmiss); @@ -979,7 +1012,7 @@ bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) { return false; // Check if the cold path's dependence slice is expensive for any of the // selects of the group. - for (SelectLike SI : ASI) { + for (SelectLike SI : ASI.Selects) { Instruction *ColdI = nullptr; uint64_t HotWeight; if (TrueWeight < FalseWeight) { @@ -1169,7 +1202,8 @@ bool SelectOptimizeImpl::computeLoopCosts( DenseMap &InstCostMap, CostInfo *LoopCost) { LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " << L->getHeader()->getName() << "\n"); - const auto &SImap = getSImap(SIGroups); + const auto SImap = getSImap(SIGroups); + const auto SGmap = getSGmap(SIGroups); // Compute instruction and loop-critical-path costs across two iterations for // both predicated and non-predicated version. const unsigned Iterations = 2; @@ -1216,13 +1250,14 @@ bool SelectOptimizeImpl::computeLoopCosts( // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate if (SImap.contains(&I)) { auto SI = SImap.at(&I); - Scaled64 TrueOpCost = SI.getTrueOpCost(InstCostMap, TTI); - Scaled64 FalseOpCost = SI.getFalseOpCost(InstCostMap, TTI); + const auto *SG = SGmap.at(&I); + Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI); + Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI); Scaled64 PredictedPathCost = getPredictedPathCost(TrueOpCost, FalseOpCost, SI); Scaled64 CondCost = Scaled64::getZero(); - if (auto *CI = dyn_cast(SI.getCondition())) + if (auto *CI = dyn_cast(SG->Condition)) if (InstCostMap.count(CI)) CondCost = InstCostMap[CI].NonPredCost; Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); @@ -1248,11 +1283,20 @@ SmallDenseMap SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) { SmallDenseMap SImap; for (const SelectGroup &ASI : SIGroups) - for (SelectLike SI : ASI) + for (const SelectLike &SI : ASI.Selects) SImap.try_emplace(SI.getI(), SI); return SImap; } +SmallDenseMap +SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) { + SmallDenseMap SImap; + for (const SelectGroup &ASI : SIGroups) + for (const SelectLike &SI : ASI.Selects) + SImap.try_emplace(SI.getI(), &ASI); + return SImap; +} + std::optional SelectOptimizeImpl::computeInstCost(const Instruction *I) { InstructionCost ICost = @@ -1311,9 +1355,6 @@ SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, } bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) { - bool VectorCond = !SI.getCondition()->getType()->isIntegerTy(1); - if (VectorCond) - return false; TargetLowering::SelectSupportKind SelectKind; if (SI.getType()->isVectorTy()) SelectKind = TargetLowering::ScalarCondVectorVal; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 521829675ae7c3..57cce9f7852636 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22746,16 +22746,21 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. -static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, - const SDLoc &DL, bool LegalOperations) { +static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, + const SDLoc &DL) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast(Index); - if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || + unsigned Opc = Vec.getOpcode(); + if (!IndexC || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) || !Vec.hasOneUse() || Vec->getNumValues() != 1) return SDValue(); + EVT ResVT = ExtElt->getValueType(0); + if (Opc == ISD::SETCC && ResVT != Vec.getValueType().getVectorElementType()) + return SDValue(); + // Targets may want to avoid this to prevent an expensive register transfer. if (!TLI.shouldScalarizeBinop(Vec)) return SDValue(); @@ -22766,19 +22771,23 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, SDValue Op0 = Vec.getOperand(0); SDValue Op1 = Vec.getOperand(1); APInt SplatVal; - if (isAnyConstantBuildVector(Op0, true) || - ISD::isConstantSplatVector(Op0.getNode(), SplatVal) || - isAnyConstantBuildVector(Op1, true) || - ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) { - // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' - // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) - EVT VT = ExtElt->getValueType(0); - SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); - SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); - return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); - } + if (!isAnyConstantBuildVector(Op0, true) && + !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) && + !isAnyConstantBuildVector(Op1, true) && + !ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) + return SDValue(); - return SDValue(); + // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C' + // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC) + EVT OpVT = Op0->getValueType(0).getVectorElementType(); + Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index); + Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index); + + if (Opc == ISD::SETCC) + return DAG.getSetCC(DL, ResVT, Op0, Op1, + cast(Vec->getOperand(2))->get()); + else + return DAG.getNode(Opc, DL, ResVT, Op0, Op1); } // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract, @@ -23011,7 +23020,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations)) + if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL)) return BO; if (VecVT.isScalableVector()) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1480bd98c685e1..fbc96bade15f5a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2330,10 +2330,10 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem); const int Precision = APFloat::semanticsPrecision(FltSem); - const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT); - const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT); + const SDValue MaxExp = DAG.getSignedConstant(MaxExpVal, dl, ExpVT); + const SDValue MinExp = DAG.getSignedConstant(MinExpVal, dl, ExpVT); - const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT); + const SDValue DoubleMaxExp = DAG.getSignedConstant(2 * MaxExpVal, dl, ExpVT); const APFloat One(FltSem, "1.0"); APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven); @@ -2375,7 +2375,7 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW); SDValue ClampMinVal = - DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT); + DAG.getSignedConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT); SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal); SDValue IncN1 = DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW); @@ -2385,8 +2385,8 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal); SDValue ScaleDownTwice = DAG.getSetCC( - dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT), - ISD::SETULT); + dl, SetCCVT, N, + DAG.getSignedConstant(2 * MinExpVal + Precision, dl, ExpVT), ISD::SETULT); SDValue SelectN_Small = DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8fbab337cab6f0..bd4bcadb57d7a9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -61,10 +61,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, // the return. Ignore following attributes because they don't affect the // call sequence. AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs()); - for (const auto &Attr : - {Attribute::Alignment, Attribute::Dereferenceable, - Attribute::DereferenceableOrNull, Attribute::NoAlias, - Attribute::NonNull, Attribute::NoUndef, Attribute::Range}) + for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable, + Attribute::DereferenceableOrNull, Attribute::NoAlias, + Attribute::NonNull, Attribute::NoUndef, + Attribute::Range, Attribute::NoFPClass}) CallerAttrs.removeAttribute(Attr); if (CallerAttrs.hasAttributes()) diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index a6159a38753cf5..d407e9f0871d4c 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1235,13 +1235,13 @@ void TargetPassConfig::addMachinePasses() { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::PassLast)); - bool NeedsBBSections = - TM->getBBSectionsType() != llvm::BasicBlockSection::None; - // Machine function splitter uses the basic block sections feature. Both - // cannot be enabled at the same time. We do not apply machine function - // splitter if -basic-block-sections is requested. - if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter || - EnableMachineFunctionSplitter)) { + // Machine function splitter uses the basic block sections feature. + // When used along with `-basic-block-sections=`, the basic-block-sections + // feature takes precedence. This means functions eligible for + // basic-block-sections optimizations (`=all`, or `=list=` with function + // included in the list profile) will get that optimization instead. + if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { const std::string ProfileFile = getFSProfileFile(TM); if (!ProfileFile.empty()) { if (EnableFSDiscriminator) { @@ -1260,7 +1260,8 @@ void TargetPassConfig::addMachinePasses() { } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). - if (NeedsBBSections || TM->Options.BBAddrMap) { + if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || + TM->Options.BBAddrMap) { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 3af397bfc9ad19..b240a2a39de362 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -139,7 +139,7 @@ DICompileUnit *DIBuilder::createCompileUnit( DICompileUnit::DebugNameTableKind NameTableKind, bool RangesBaseAddress, StringRef SysRoot, StringRef SDK) { - assert(((Lang <= dwarf::DW_LANG_Mojo && Lang >= dwarf::DW_LANG_C89) || + assert(((Lang <= dwarf::DW_LANG_Metal && Lang >= dwarf::DW_LANG_C89) || (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && "Invalid Language tag"); diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 7f5c0159e1764c..c7b9f8744d8d35 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -77,6 +77,41 @@ Module::Module(StringRef MID, LLVMContext &C) Context.addModule(this); } +Module &Module::operator=(Module &&Other) { + assert(&Context == &Other.Context && "Module must be in the same Context"); + + dropAllReferences(); + + ModuleID = std::move(Other.ModuleID); + SourceFileName = std::move(Other.SourceFileName); + IsNewDbgInfoFormat = std::move(Other.IsNewDbgInfoFormat); + + GlobalList.clear(); + GlobalList.splice(GlobalList.begin(), Other.GlobalList); + + FunctionList.clear(); + FunctionList.splice(FunctionList.begin(), Other.FunctionList); + + AliasList.clear(); + AliasList.splice(AliasList.begin(), Other.AliasList); + + IFuncList.clear(); + IFuncList.splice(IFuncList.begin(), Other.IFuncList); + + NamedMDList.clear(); + NamedMDList.splice(NamedMDList.begin(), Other.NamedMDList); + GlobalScopeAsm = std::move(Other.GlobalScopeAsm); + OwnedMemoryBuffer = std::move(Other.OwnedMemoryBuffer); + Materializer = std::move(Other.Materializer); + TargetTriple = std::move(Other.TargetTriple); + DL = std::move(Other.DL); + CurrentIntrinsicIds = std::move(Other.CurrentIntrinsicIds); + UniquedIntrinsicNames = std::move(Other.UniquedIntrinsicNames); + ModuleFlags = std::move(Other.ModuleFlags); + Context.addModule(this); + return *this; +} + Module::~Module() { Context.removeModule(this); dropAllReferences(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 67834631b3c50d..55de486e90e190 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6390,6 +6390,55 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "llvm.amdgcn.s.prefetch.data only supports global or constant memory"); break; } + case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: + case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { + Value *Src0 = Call.getArgOperand(0); + Value *Src1 = Call.getArgOperand(1); + + uint64_t CBSZ = cast(Call.getArgOperand(3))->getZExtValue(); + uint64_t BLGP = cast(Call.getArgOperand(4))->getZExtValue(); + Check(CBSZ <= 4, "invalid value for cbsz format", Call, + Call.getArgOperand(3)); + Check(BLGP <= 4, "invalid value for blgp format", Call, + Call.getArgOperand(4)); + + // AMDGPU::MFMAScaleFormats values + auto getFormatNumRegs = [](unsigned FormatVal) { + switch (FormatVal) { + case 0: + case 1: + return 8u; + case 2: + case 3: + return 6u; + case 4: + return 4u; + default: + llvm_unreachable("invalid format value"); + } + }; + + auto isValidSrcASrcBVector = [](FixedVectorType *Ty) { + if (!Ty || !Ty->getElementType()->isIntegerTy(32)) + return false; + unsigned NumElts = Ty->getNumElements(); + return NumElts == 4 || NumElts == 6 || NumElts == 8; + }; + + auto *Src0Ty = dyn_cast(Src0->getType()); + auto *Src1Ty = dyn_cast(Src1->getType()); + Check(isValidSrcASrcBVector(Src0Ty), + "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0); + Check(isValidSrcASrcBVector(Src1Ty), + "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1); + + // Permit excess registers for the format. + Check(Src0Ty->getNumElements() >= getFormatNumRegs(CBSZ), + "invalid vector type for format", &Call, Src0, Call.getArgOperand(3)); + Check(Src1Ty->getNumElements() >= getFormatNumRegs(BLGP), + "invalid vector type for format", &Call, Src1, Call.getArgOperand(5)); + break; + } case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: { Value *V = Call.getArgOperand(0); diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp index cfab40a1c1595b..3c1c35adf88977 100644 --- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -24,11 +24,6 @@ namespace mca { const unsigned WriteRef::INVALID_IID = std::numeric_limits::max(); -static std::function -isNonArtificial(const MCRegisterInfo &MRI) { - return [&MRI](MCPhysReg R) { return !MRI.isArtificial(R); }; -} - WriteRef::WriteRef(unsigned SourceIndex, WriteState *WS) : IID(SourceIndex), WriteBackCycle(), WriteResID(), RegisterID(), Write(WS) {} @@ -287,8 +282,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, MCPhysReg ZeroRegisterID = WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); ZeroRegisters.setBitVal(ZeroRegisterID, IsWriteZero); - for (MCPhysReg I : - make_filter_range(MRI.subregs(ZeroRegisterID), isNonArtificial(MRI))) + for (MCPhysReg I : MRI.subregs(ZeroRegisterID)) ZeroRegisters.setBitVal(I, IsWriteZero); // If this move has been eliminated, then method tryEliminateMoveOrSwap should @@ -310,8 +304,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, // Update the mapping for register RegID including its sub-registers. RegisterMappings[RegID].first = Write; RegisterMappings[RegID].second.AliasRegID = 0U; - for (MCPhysReg I : - make_filter_range(MRI.subregs(RegID), isNonArtificial(MRI))) { + for (MCPhysReg I : MRI.subregs(RegID)) { RegisterMappings[I].first = Write; RegisterMappings[I].second.AliasRegID = 0U; } @@ -479,8 +472,7 @@ bool RegisterFile::tryEliminateMoveOrSwap(MutableArrayRef Writes, AliasedReg = RMAlias.AliasRegID; RegisterMappings[AliasReg].second.AliasRegID = AliasedReg; - for (MCPhysReg I : - make_filter_range(MRI.subregs(AliasReg), isNonArtificial(MRI))) + for (MCPhysReg I : MRI.subregs(AliasReg)) RegisterMappings[I].second.AliasRegID = AliasedReg; if (ZeroRegisters[RS.getRegisterID()]) { diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 545a672c05c8a3..b6d0699ee4fe08 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -851,29 +851,31 @@ decodeBBAddrMapImpl(const ELFFile &EF, NumBlocksInBBRange = readULEB128As(Data, Cur, ULEBSizeErr); } std::vector BBEntries; - for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur && - (BlockIndex < NumBlocksInBBRange); - ++BlockIndex) { - uint32_t ID = Version >= 2 - ? readULEB128As(Data, Cur, ULEBSizeErr) - : BlockIndex; - uint32_t Offset = readULEB128As(Data, Cur, ULEBSizeErr); - uint32_t Size = readULEB128As(Data, Cur, ULEBSizeErr); - uint32_t MD = readULEB128As(Data, Cur, ULEBSizeErr); - if (Version >= 1) { - // Offset is calculated relative to the end of the previous BB. - Offset += PrevBBEndOffset; - PrevBBEndOffset = Offset + Size; - } - Expected MetadataOrErr = - BBAddrMap::BBEntry::Metadata::decode(MD); - if (!MetadataOrErr) { - MetadataDecodeErr = MetadataOrErr.takeError(); - break; + if (!FeatEnable.OmitBBEntries) { + for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && + Cur && (BlockIndex < NumBlocksInBBRange); + ++BlockIndex) { + uint32_t ID = Version >= 2 + ? readULEB128As(Data, Cur, ULEBSizeErr) + : BlockIndex; + uint32_t Offset = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t Size = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t MD = readULEB128As(Data, Cur, ULEBSizeErr); + if (Version >= 1) { + // Offset is calculated relative to the end of the previous BB. + Offset += PrevBBEndOffset; + PrevBBEndOffset = Offset + Size; + } + Expected MetadataOrErr = + BBAddrMap::BBEntry::Metadata::decode(MD); + if (!MetadataOrErr) { + MetadataDecodeErr = MetadataOrErr.takeError(); + break; + } + BBEntries.push_back({ID, Offset, Size, *MetadataOrErr}); } - BBEntries.push_back({ID, Offset, Size, *MetadataOrErr}); + TotalNumBlocks += BBEntries.size(); } - TotalNumBlocks += BBEntries.size(); BBRangeEntries.push_back({RangeBaseAddress, std::move(BBEntries)}); } FunctionEntries.push_back({std::move(BBRangeEntries)}); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 3214adf744bd9f..476334024151a9 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1497,7 +1497,7 @@ void ELFState::writeSectionContent( BBR.NumBlocks.value_or(BBR.BBEntries ? BBR.BBEntries->size() : 0); SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks); // Write all BBEntries in this BBRange. - if (!BBR.BBEntries) + if (!BBR.BBEntries || FeatureOrErr->OmitBBEntries) continue; for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *BBR.BBEntries) { ++TotalNumBlocks; diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index e43f3ac9f08d49..76638522365942 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1225,8 +1225,8 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, } } -Error IndexedMemProfReader::deserializeV12(const unsigned char *Start, - const unsigned char *Ptr) { +Error IndexedMemProfReader::deserializeV2(const unsigned char *Start, + const unsigned char *Ptr) { // The value returned from RecordTableGenerator.Emit. const uint64_t RecordTableOffset = support::endian::readNext(Ptr); @@ -1322,8 +1322,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, const uint64_t FirstWord = support::endian::readNext(Ptr); - if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 || - FirstWord == memprof::Version3) { + if (FirstWord == memprof::Version2 || FirstWord == memprof::Version3) { // Everything is good. We can proceed to deserialize the rest. Version = static_cast(FirstWord); } else { @@ -1336,9 +1335,8 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, } switch (Version) { - case memprof::Version1: case memprof::Version2: - if (Error E = deserializeV12(Start, Ptr)) + if (Error E = deserializeV2(Start, Ptr)) return E; break; case memprof::Version3: @@ -1347,18 +1345,6 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, break; } -#ifdef EXPENSIVE_CHECKS - // Go through all the records and verify that CSId has been correctly - // populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we - // would defeat the purpose of OnDiskIterableChainedHashTable. - // Note that we can compare CSId against actual call stacks only for - // Version0 and Version1 because IndexedAllocationInfo::CallStack and - // IndexedMemProfRecord::CallSites are not populated in Version2. - if (Version <= memprof::Version1) - for (const auto &Record : MemProfRecordTable->data()) - verifyIndexedMemProfRecord(Record); -#endif - return Error::success(); } @@ -1558,25 +1544,6 @@ Expected IndexedInstrProfReader::getInstrProfRecord( return error(instrprof_error::unknown_function); } -static Expected -getMemProfRecordV0(const memprof::IndexedMemProfRecord &IndexedRecord, - MemProfFrameHashTable &MemProfFrameTable) { - memprof::FrameIdConverter FrameIdConv( - MemProfFrameTable); - - memprof::MemProfRecord Record = - memprof::MemProfRecord(IndexedRecord, FrameIdConv); - - // Check that all frame ids were successfully converted to frames. - if (FrameIdConv.LastUnmappedId) { - return make_error(instrprof_error::hash_mismatch, - "memprof frame not found for frame id " + - Twine(*FrameIdConv.LastUnmappedId)); - } - - return Record; -} - static Expected getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord, MemProfFrameHashTable &MemProfFrameTable, @@ -1631,11 +1598,6 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const { const memprof::IndexedMemProfRecord &IndexedRecord = *Iter; switch (Version) { - case memprof::Version1: - assert(MemProfFrameTable && "MemProfFrameTable must be available"); - assert(!MemProfCallStackTable && - "MemProfCallStackTable must not be available"); - return getMemProfRecordV0(IndexedRecord, *MemProfFrameTable); case memprof::Version2: assert(MemProfFrameTable && "MemProfFrameTable must be available"); assert(MemProfCallStackTable && "MemProfCallStackTable must be available"); diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index d90629ad57f5b9..d8ab18d213e3d7 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -351,9 +351,14 @@ bool InstrProfWriter::addMemProfCallStack( bool InstrProfWriter::addMemProfData(memprof::IndexedMemProfData Incoming, function_ref Warn) { - // TODO: Once we remove support for MemProf format Version V1, assert that - // the three components (frames, call stacks, and records) are either all - // empty or populated. + // Return immediately if everything is empty. + if (Incoming.Frames.empty() && Incoming.CallStacks.empty() && + Incoming.Records.empty()) + return true; + + // Otherwise, every component must be non-empty. + assert(!Incoming.Frames.empty() && !Incoming.CallStacks.empty() && + !Incoming.Records.empty()); if (MemProfData.Frames.empty()) MemProfData.Frames = std::move(Incoming.Frames); @@ -636,7 +641,7 @@ writeMemProfCallStackArray( MemProfCallStackIndexes; memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); for (auto I : Builder.getRadixArray()) OS.write32(I); @@ -649,41 +654,6 @@ writeMemProfCallStackArray( return MemProfCallStackIndexes; } -// Write out MemProf Version1 as follows: -// uint64_t Version (NEW in V1) -// uint64_t RecordTableOffset = RecordTableGenerator.Emit -// uint64_t FramePayloadOffset = Offset for the frame payload -// uint64_t FrameTableOffset = FrameTableGenerator.Emit -// uint64_t Num schema entries -// uint64_t Schema entry 0 -// uint64_t Schema entry 1 -// .... -// uint64_t Schema entry N - 1 -// OnDiskChainedHashTable MemProfRecordData -// OnDiskChainedHashTable MemProfFrameData -static Error writeMemProfV1(ProfOStream &OS, - memprof::IndexedMemProfData &MemProfData) { - OS.write(memprof::Version1); - uint64_t HeaderUpdatePos = OS.tell(); - OS.write(0ULL); // Reserve space for the memprof record table offset. - OS.write(0ULL); // Reserve space for the memprof frame payload offset. - OS.write(0ULL); // Reserve space for the memprof frame table offset. - - auto Schema = memprof::getFullSchema(); - writeMemProfSchema(OS, Schema); - - uint64_t RecordTableOffset = - writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version1); - - uint64_t FramePayloadOffset = OS.tell(); - uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); - - uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; - OS.patch({{HeaderUpdatePos, Header}}); - - return Error::success(); -} - // Write out MemProf Version2 as follows: // uint64_t Version // uint64_t RecordTableOffset = RecordTableGenerator.Emit @@ -805,8 +775,6 @@ static Error writeMemProf(ProfOStream &OS, memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) { switch (MemProfVersionRequested) { - case memprof::Version1: - return writeMemProfV1(OS, MemProfData); case memprof::Version2: return writeMemProfV2(OS, MemProfData, MemProfFullSchema); case memprof::Version3: diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 9d5ac748d7975d..1c240c3858cc76 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -23,18 +23,6 @@ MemProfSchema getHotColdSchema() { Meta::TotalLifetimeAccessDensity}; } -static size_t serializedSizeV0(const IndexedAllocationInfo &IAI, - const MemProfSchema &Schema) { - size_t Size = 0; - // The number of frames to serialize. - Size += sizeof(uint64_t); - // The callstack frame ids. - Size += sizeof(FrameId) * IAI.CallStack.size(); - // The size of the payload. - Size += PortableMemInfoBlock::serializedSize(Schema); - return Size; -} - static size_t serializedSizeV2(const IndexedAllocationInfo &IAI, const MemProfSchema &Schema) { size_t Size = 0; @@ -58,8 +46,6 @@ static size_t serializedSizeV3(const IndexedAllocationInfo &IAI, size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const { switch (Version) { - case Version1: - return serializedSizeV0(*this, Schema); case Version2: return serializedSizeV2(*this, Schema); case Version3: @@ -68,23 +54,6 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, llvm_unreachable("unsupported MemProf version"); } -static size_t serializedSizeV1(const IndexedMemProfRecord &Record, - const MemProfSchema &Schema) { - // The number of alloc sites to serialize. - size_t Result = sizeof(uint64_t); - for (const IndexedAllocationInfo &N : Record.AllocSites) - Result += N.serializedSize(Schema, Version1); - - // The number of callsites we have information for. - Result += sizeof(uint64_t); - for (const auto &Frames : Record.CallSites) { - // The number of frame ids to serialize. - Result += sizeof(uint64_t); - Result += Frames.size() * sizeof(FrameId); - } - return Result; -} - static size_t serializedSizeV2(const IndexedMemProfRecord &Record, const MemProfSchema &Schema) { // The number of alloc sites to serialize. @@ -116,8 +85,6 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record, size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, IndexedVersion Version) const { switch (Version) { - case Version1: - return serializedSizeV1(*this, Schema); case Version2: return serializedSizeV2(*this, Schema); case Version3: @@ -126,29 +93,6 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, llvm_unreachable("unsupported MemProf version"); } -static void serializeV1(const IndexedMemProfRecord &Record, - const MemProfSchema &Schema, raw_ostream &OS) { - using namespace support; - - endian::Writer LE(OS, llvm::endianness::little); - - LE.write(Record.AllocSites.size()); - for (const IndexedAllocationInfo &N : Record.AllocSites) { - LE.write(N.CallStack.size()); - for (const FrameId &Id : N.CallStack) - LE.write(Id); - N.Info.serialize(Schema, OS); - } - - // Related contexts. - LE.write(Record.CallSites.size()); - for (const auto &Frames : Record.CallSites) { - LE.write(Frames.size()); - for (const FrameId &Id : Frames) - LE.write(Id); - } -} - static void serializeV2(const IndexedMemProfRecord &Record, const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; @@ -195,9 +139,6 @@ void IndexedMemProfRecord::serialize( llvm::DenseMap *MemProfCallStackIndexes) const { switch (Version) { - case Version1: - serializeV1(*this, Schema, OS); - return; case Version2: serializeV2(*this, Schema, OS); return; @@ -208,50 +149,6 @@ void IndexedMemProfRecord::serialize( llvm_unreachable("unsupported MemProf version"); } -static IndexedMemProfRecord deserializeV1(const MemProfSchema &Schema, - const unsigned char *Ptr) { - using namespace support; - - IndexedMemProfRecord Record; - - // Read the meminfo nodes. - const uint64_t NumNodes = - endian::readNext(Ptr); - for (uint64_t I = 0; I < NumNodes; I++) { - IndexedAllocationInfo Node; - const uint64_t NumFrames = - endian::readNext(Ptr); - for (uint64_t J = 0; J < NumFrames; J++) { - const FrameId Id = - endian::readNext(Ptr); - Node.CallStack.push_back(Id); - } - Node.CSId = hashCallStack(Node.CallStack); - Node.Info.deserialize(Schema, Ptr); - Ptr += PortableMemInfoBlock::serializedSize(Schema); - Record.AllocSites.push_back(Node); - } - - // Read the callsite information. - const uint64_t NumCtxs = - endian::readNext(Ptr); - for (uint64_t J = 0; J < NumCtxs; J++) { - const uint64_t NumFrames = - endian::readNext(Ptr); - llvm::SmallVector Frames; - Frames.reserve(NumFrames); - for (uint64_t K = 0; K < NumFrames; K++) { - const FrameId Id = - endian::readNext(Ptr); - Frames.push_back(Id); - } - Record.CallSites.push_back(Frames); - Record.CallSiteIds.push_back(hashCallStack(Frames)); - } - - return Record; -} - static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, const unsigned char *Ptr) { using namespace support; @@ -324,8 +221,6 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, const unsigned char *Ptr, IndexedVersion Version) { switch (Version) { - case Version1: - return deserializeV1(Schema, Ptr); case Version2: return deserializeV2(Schema, Ptr); case Version3: @@ -440,8 +335,7 @@ template LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( const llvm::SmallVector *CallStack, const llvm::SmallVector *Prev, - std::optional> - MemProfFrameIndexes) { + const llvm::DenseMap *MemProfFrameIndexes) { // Compute the length of the common root prefix between Prev and CallStack. uint32_t CommonLen = 0; if (Prev) { @@ -486,8 +380,7 @@ template void CallStackRadixTreeBuilder::build( llvm::MapVector> &&MemProfCallStackData, - std::optional> - MemProfFrameIndexes, + const llvm::DenseMap *MemProfFrameIndexes, llvm::DenseMap &FrameHistogram) { // Take the vector portion of MemProfCallStackData. The vector is exactly // what we need to sort. Also, we no longer need its lookup capability. @@ -615,6 +508,7 @@ void CallStackRadixTreeBuilder::build( // Explicitly instantiate class with the utilized FrameIdTy. template class CallStackRadixTreeBuilder; +template class CallStackRadixTreeBuilder; template llvm::DenseMap @@ -637,22 +531,9 @@ computeFrameHistogram(llvm::MapVector> template llvm::DenseMap computeFrameHistogram( llvm::MapVector> &MemProfCallStackData); - -void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { - for (const auto &AS : Record.AllocSites) { - assert(AS.CSId == hashCallStack(AS.CallStack)); - (void)AS; - } -} - -void verifyFunctionProfileData( - const llvm::MapVector - &FunctionProfileData) { - for (const auto &[GUID, Record] : FunctionProfileData) { - (void)GUID; - verifyIndexedMemProfRecord(Record); - } -} - +template llvm::DenseMap +computeFrameHistogram( + llvm::MapVector> + &MemProfCallStackData); } // namespace memprof } // namespace llvm diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index de5b4c23c58a08..7f88b318f6f18f 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -549,8 +549,6 @@ Error RawMemProfReader::mapRawProfileToRecords() { } } - verifyFunctionProfileData(FunctionProfileData); - return Error::success(); } diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index badaf68ab59cd0..3979ca6acaf74e 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -206,12 +206,13 @@ Error zstd::decompress(ArrayRef Input, uint8_t *Output, const size_t Res = ::ZSTD_decompress( Output, UncompressedSize, (const uint8_t *)Input.data(), Input.size()); UncompressedSize = Res; + if (ZSTD_isError(Res)) + return make_error(ZSTD_getErrorName(Res), + inconvertibleErrorCode()); // Tell MemorySanitizer that zstd output buffer is fully initialized. // This avoids a false report when running LLVM with uninstrumented ZLib. __msan_unpoison(Output, UncompressedSize); - return ZSTD_isError(Res) ? make_error(ZSTD_getErrorName(Res), - inconvertibleErrorCode()) - : Error::success(); + return Error::success(); } Error zstd::decompress(ArrayRef Input, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index cb0b9e965277aa..d51b36f7e49946 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1348,6 +1348,10 @@ class AArch64TargetLowering : public TargetLowering { unsigned getMinimumJumpTableEntries() const override; bool softPromoteHalfType() const override { return true; } + + bool shouldScalarizeBinop(SDValue VecOp) const override { + return VecOp.getOpcode() == ISD::SETCC; + } }; namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 4f46ca1538db2b..ab00da51cf4fa4 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1540,10 +1540,7 @@ static bool canRenameMOP(const MachineOperand &MOP, // Note that this relies on the structure of the AArch64 register file. In // particular, a subregister cannot be written without overwriting the // whole register. - if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs && - (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) || - TRI->getSubRegisterClass(RegClass, AArch64::qsub0) || - TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) { + if (RegClass->HasDisjunctSubRegs) { LLVM_DEBUG( dbgs() << " Cannot rename operands with multiple disjunct subregisters (" diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index f13c162776a9b1..380f37df0bc2b9 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -423,57 +423,6 @@ AArch64RegisterInfo::explainReservedReg(const MachineFunction &MF, return {}; } -static const MCPhysReg ReservedHi[] = { - AArch64::B0_HI, AArch64::B1_HI, AArch64::B2_HI, AArch64::B3_HI, - AArch64::B4_HI, AArch64::B5_HI, AArch64::B6_HI, AArch64::B7_HI, - AArch64::B8_HI, AArch64::B9_HI, AArch64::B10_HI, AArch64::B11_HI, - AArch64::B12_HI, AArch64::B13_HI, AArch64::B14_HI, AArch64::B15_HI, - AArch64::B16_HI, AArch64::B17_HI, AArch64::B18_HI, AArch64::B19_HI, - AArch64::B20_HI, AArch64::B21_HI, AArch64::B22_HI, AArch64::B23_HI, - AArch64::B24_HI, AArch64::B25_HI, AArch64::B26_HI, AArch64::B27_HI, - AArch64::B28_HI, AArch64::B29_HI, AArch64::B30_HI, AArch64::B31_HI, - AArch64::H0_HI, AArch64::H1_HI, AArch64::H2_HI, AArch64::H3_HI, - AArch64::H4_HI, AArch64::H5_HI, AArch64::H6_HI, AArch64::H7_HI, - AArch64::H8_HI, AArch64::H9_HI, AArch64::H10_HI, AArch64::H11_HI, - AArch64::H12_HI, AArch64::H13_HI, AArch64::H14_HI, AArch64::H15_HI, - AArch64::H16_HI, AArch64::H17_HI, AArch64::H18_HI, AArch64::H19_HI, - AArch64::H20_HI, AArch64::H21_HI, AArch64::H22_HI, AArch64::H23_HI, - AArch64::H24_HI, AArch64::H25_HI, AArch64::H26_HI, AArch64::H27_HI, - AArch64::H28_HI, AArch64::H29_HI, AArch64::H30_HI, AArch64::H31_HI, - AArch64::S0_HI, AArch64::S1_HI, AArch64::S2_HI, AArch64::S3_HI, - AArch64::S4_HI, AArch64::S5_HI, AArch64::S6_HI, AArch64::S7_HI, - AArch64::S8_HI, AArch64::S9_HI, AArch64::S10_HI, AArch64::S11_HI, - AArch64::S12_HI, AArch64::S13_HI, AArch64::S14_HI, AArch64::S15_HI, - AArch64::S16_HI, AArch64::S17_HI, AArch64::S18_HI, AArch64::S19_HI, - AArch64::S20_HI, AArch64::S21_HI, AArch64::S22_HI, AArch64::S23_HI, - AArch64::S24_HI, AArch64::S25_HI, AArch64::S26_HI, AArch64::S27_HI, - AArch64::S28_HI, AArch64::S29_HI, AArch64::S30_HI, AArch64::S31_HI, - AArch64::D0_HI, AArch64::D1_HI, AArch64::D2_HI, AArch64::D3_HI, - AArch64::D4_HI, AArch64::D5_HI, AArch64::D6_HI, AArch64::D7_HI, - AArch64::D8_HI, AArch64::D9_HI, AArch64::D10_HI, AArch64::D11_HI, - AArch64::D12_HI, AArch64::D13_HI, AArch64::D14_HI, AArch64::D15_HI, - AArch64::D16_HI, AArch64::D17_HI, AArch64::D18_HI, AArch64::D19_HI, - AArch64::D20_HI, AArch64::D21_HI, AArch64::D22_HI, AArch64::D23_HI, - AArch64::D24_HI, AArch64::D25_HI, AArch64::D26_HI, AArch64::D27_HI, - AArch64::D28_HI, AArch64::D29_HI, AArch64::D30_HI, AArch64::D31_HI, - AArch64::Q0_HI, AArch64::Q1_HI, AArch64::Q2_HI, AArch64::Q3_HI, - AArch64::Q4_HI, AArch64::Q5_HI, AArch64::Q6_HI, AArch64::Q7_HI, - AArch64::Q8_HI, AArch64::Q9_HI, AArch64::Q10_HI, AArch64::Q11_HI, - AArch64::Q12_HI, AArch64::Q13_HI, AArch64::Q14_HI, AArch64::Q15_HI, - AArch64::Q16_HI, AArch64::Q17_HI, AArch64::Q18_HI, AArch64::Q19_HI, - AArch64::Q20_HI, AArch64::Q21_HI, AArch64::Q22_HI, AArch64::Q23_HI, - AArch64::Q24_HI, AArch64::Q25_HI, AArch64::Q26_HI, AArch64::Q27_HI, - AArch64::Q28_HI, AArch64::Q29_HI, AArch64::Q30_HI, AArch64::Q31_HI, - AArch64::W0_HI, AArch64::W1_HI, AArch64::W2_HI, AArch64::W3_HI, - AArch64::W4_HI, AArch64::W5_HI, AArch64::W6_HI, AArch64::W7_HI, - AArch64::W8_HI, AArch64::W9_HI, AArch64::W10_HI, AArch64::W11_HI, - AArch64::W12_HI, AArch64::W13_HI, AArch64::W14_HI, AArch64::W15_HI, - AArch64::W16_HI, AArch64::W17_HI, AArch64::W18_HI, AArch64::W19_HI, - AArch64::W20_HI, AArch64::W21_HI, AArch64::W22_HI, AArch64::W23_HI, - AArch64::W24_HI, AArch64::W25_HI, AArch64::W26_HI, AArch64::W27_HI, - AArch64::W28_HI, AArch64::W29_HI, AArch64::W30_HI, AArch64::WSP_HI, - AArch64::WZR_HI}; - BitVector AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { const AArch64FrameLowering *TFI = getFrameLowering(MF); @@ -540,10 +489,7 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, AArch64::W28); } - for (Register R : ReservedHi) - Reserved.set(R); - - assert(checkAllSuperRegsMarked(Reserved, ReservedHi)); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -567,7 +513,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, AArch64::LR); } - assert(checkAllSuperRegsMarked(Reserved, ReservedHi)); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 4fec120391f016..ed16a007b49cf5 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -20,49 +20,33 @@ class AArch64Reg enc, string n, list subregs = [], let Namespace = "AArch64" in { // SubRegIndexes for GPR registers - def sub_32 : SubRegIndex<32>; - def sube64 : SubRegIndex<64>; - def subo64 : SubRegIndex<64>; - def sube32 : SubRegIndex<32>; - def subo32 : SubRegIndex<32>; + def sub_32 : SubRegIndex<32>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; // SubRegIndexes for FPR/Vector registers - def bsub : SubRegIndex<8, 0>; - def hsub : SubRegIndex<16, 0>; - def ssub : SubRegIndex<32, 0>; - def dsub : SubRegIndex<64, 0>; - def zsub : SubRegIndex<128, 0>; - - // The _hi SubRegIndexes describe the high bits of a register which are not - // separately addressable. They need to be described so that partially - // overlapping registers end up with a different lane mask. This is required - // to enable subreg liveness tracking. - // - // For example: 8-bit B0 is a sub-register of 16-bit H0. - // * B0 is described with 'bsub'. - // * H0 is described with 'bsub + bsub_hi' == 'hsub'. - def bsub_hi : SubRegIndex<8, 8>; - def hsub_hi : SubRegIndex<16, 16>; - def ssub_hi : SubRegIndex<32, 32>; - def dsub_hi : SubRegIndex<64, 64>; - def zsub_hi : SubRegIndex<-1, 128>; - // sub_32_hi describes the top 32 bits in e.g. X0 - def sub_32_hi : SubRegIndex<32, 32>; + def bsub : SubRegIndex<8>; + def hsub : SubRegIndex<16>; + def ssub : SubRegIndex<32>; + def dsub : SubRegIndex<64>; + def zsub : SubRegIndex<128>; // Note: Code depends on these having consecutive numbers - def zsub0 : SubRegIndex<-1>; - def zsub1 : SubRegIndex<-1>; - def zsub2 : SubRegIndex<-1>; - def zsub3 : SubRegIndex<-1>; - // Note: Code depends on these having consecutive numbers - def qsub0 : SubRegIndex<128>; - def qsub1 : ComposedSubRegIndex; - def qsub2 : ComposedSubRegIndex; - def qsub3 : ComposedSubRegIndex; + def zsub0 : SubRegIndex<128, -1>; + def zsub1 : SubRegIndex<128, -1>; + def zsub2 : SubRegIndex<128, -1>; + def zsub3 : SubRegIndex<128, -1>; // Note: Code depends on these having consecutive numbers def dsub0 : SubRegIndex<64>; - def dsub1 : ComposedSubRegIndex; - def dsub2 : ComposedSubRegIndex; - def dsub3 : ComposedSubRegIndex; + def dsub1 : SubRegIndex<64>; + def dsub2 : SubRegIndex<64>; + def dsub3 : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def qsub0 : SubRegIndex<128>; + def qsub1 : SubRegIndex<128>; + def qsub2 : SubRegIndex<128>; + def qsub3 : SubRegIndex<128>; // SubRegIndexes for SME Matrix tiles def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits @@ -76,10 +60,10 @@ let Namespace = "AArch64" in { def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits // SubRegIndexes for SVE Predicates - def psub : SubRegIndex<-1>; + def psub : SubRegIndex<16>; // Note: Code depends on these having consecutive numbers - def psub0 : SubRegIndex<-1>; - def psub1 : SubRegIndex<-1>; + def psub0 : SubRegIndex<16, -1>; + def psub1 : SubRegIndex<16, -1>; } let Namespace = "AArch64" in { @@ -90,14 +74,6 @@ let Namespace = "AArch64" in { //===----------------------------------------------------------------------===// // Registers //===----------------------------------------------------------------------===// - -foreach i = 0-30 in { - // Define W0_HI, W1_HI, .. W30_HI - def W#i#_HI : AArch64Reg<-1, "w"#i#"_hi"> { let isArtificial = 1; } -} -def WSP_HI : AArch64Reg<-1, "wsp_hi"> { let isArtificial = 1; } -def WZR_HI : AArch64Reg<-1, "wzr_hi"> { let isArtificial = 1; } - def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>; def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>; def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>; @@ -130,42 +106,44 @@ def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>; def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>; def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>; def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias { let isConstant = true; } - -let SubRegIndices = [sub_32, sub_32_hi], CoveredBySubRegs = 1 in { -def X0 : AArch64Reg<0, "x0", [W0, W0_HI]>, DwarfRegAlias; -def X1 : AArch64Reg<1, "x1", [W1, W1_HI]>, DwarfRegAlias; -def X2 : AArch64Reg<2, "x2", [W2, W2_HI]>, DwarfRegAlias; -def X3 : AArch64Reg<3, "x3", [W3, W3_HI]>, DwarfRegAlias; -def X4 : AArch64Reg<4, "x4", [W4, W4_HI]>, DwarfRegAlias; -def X5 : AArch64Reg<5, "x5", [W5, W5_HI]>, DwarfRegAlias; -def X6 : AArch64Reg<6, "x6", [W6, W6_HI]>, DwarfRegAlias; -def X7 : AArch64Reg<7, "x7", [W7, W7_HI]>, DwarfRegAlias; -def X8 : AArch64Reg<8, "x8", [W8, W8_HI]>, DwarfRegAlias; -def X9 : AArch64Reg<9, "x9", [W9, W9_HI]>, DwarfRegAlias; -def X10 : AArch64Reg<10, "x10", [W10, W10_HI]>, DwarfRegAlias; -def X11 : AArch64Reg<11, "x11", [W11, W11_HI]>, DwarfRegAlias; -def X12 : AArch64Reg<12, "x12", [W12, W12_HI]>, DwarfRegAlias; -def X13 : AArch64Reg<13, "x13", [W13, W13_HI]>, DwarfRegAlias; -def X14 : AArch64Reg<14, "x14", [W14, W14_HI]>, DwarfRegAlias; -def X15 : AArch64Reg<15, "x15", [W15, W15_HI]>, DwarfRegAlias; -def X16 : AArch64Reg<16, "x16", [W16, W16_HI]>, DwarfRegAlias; -def X17 : AArch64Reg<17, "x17", [W17, W17_HI]>, DwarfRegAlias; -def X18 : AArch64Reg<18, "x18", [W18, W18_HI]>, DwarfRegAlias; -def X19 : AArch64Reg<19, "x19", [W19, W19_HI]>, DwarfRegAlias; -def X20 : AArch64Reg<20, "x20", [W20, W20_HI]>, DwarfRegAlias; -def X21 : AArch64Reg<21, "x21", [W21, W21_HI]>, DwarfRegAlias; -def X22 : AArch64Reg<22, "x22", [W22, W22_HI]>, DwarfRegAlias; -def X23 : AArch64Reg<23, "x23", [W23, W23_HI]>, DwarfRegAlias; -def X24 : AArch64Reg<24, "x24", [W24, W24_HI]>, DwarfRegAlias; -def X25 : AArch64Reg<25, "x25", [W25, W25_HI]>, DwarfRegAlias; -def X26 : AArch64Reg<26, "x26", [W26, W26_HI]>, DwarfRegAlias; -def X27 : AArch64Reg<27, "x27", [W27, W27_HI]>, DwarfRegAlias; -def X28 : AArch64Reg<28, "x28", [W28, W28_HI]>, DwarfRegAlias; -def FP : AArch64Reg<29, "x29", [W29, W29_HI]>, DwarfRegAlias; -def LR : AArch64Reg<30, "x30", [W30, W30_HI]>, DwarfRegAlias; -def SP : AArch64Reg<31, "sp", [WSP, WSP_HI]>, DwarfRegAlias; -def XZR : AArch64Reg<31, "xzr", [WZR, WZR_HI]>, DwarfRegAlias { let isConstant = true; } +let isConstant = true in +def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias; + +let SubRegIndices = [sub_32] in { +def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias; +def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias; +def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias; +def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias; +def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias; +def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias; +def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias; +def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias; +def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias; +def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias; +def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias; +def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias; +def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias; +def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias; +def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias; +def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias; +def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias; +def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias; +def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias; +def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias; +def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias; +def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias; +def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias; +def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias; +def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias; +def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias; +def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias; +def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias; +def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias; +def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias; +def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias; +def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias; +let isConstant = true in +def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias; } // Condition code register. @@ -316,14 +294,6 @@ def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { // Floating Point Scalar Registers //===----------------------------------------------------------------------===// -foreach i = 0-31 in { - def B#i#_HI : AArch64Reg<-1, "b"#i#"_hi"> { let isArtificial = 1; } - def H#i#_HI : AArch64Reg<-1, "h"#i#"_hi"> { let isArtificial = 1; } - def S#i#_HI : AArch64Reg<-1, "s"#i#"_hi"> { let isArtificial = 1; } - def D#i#_HI : AArch64Reg<-1, "d"#i#"_hi"> { let isArtificial = 1; } - def Q#i#_HI : AArch64Reg<-1, "q"#i#"_hi"> { let isArtificial = 1; } -} - def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>; def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>; def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>; @@ -357,144 +327,144 @@ def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>; def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>; def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>; -let SubRegIndices = [bsub, bsub_hi] in { -def H0 : AArch64Reg<0, "h0", [B0, B0_HI]>, DwarfRegAlias; -def H1 : AArch64Reg<1, "h1", [B1, B1_HI]>, DwarfRegAlias; -def H2 : AArch64Reg<2, "h2", [B2, B2_HI]>, DwarfRegAlias; -def H3 : AArch64Reg<3, "h3", [B3, B3_HI]>, DwarfRegAlias; -def H4 : AArch64Reg<4, "h4", [B4, B4_HI]>, DwarfRegAlias; -def H5 : AArch64Reg<5, "h5", [B5, B5_HI]>, DwarfRegAlias; -def H6 : AArch64Reg<6, "h6", [B6, B6_HI]>, DwarfRegAlias; -def H7 : AArch64Reg<7, "h7", [B7, B7_HI]>, DwarfRegAlias; -def H8 : AArch64Reg<8, "h8", [B8, B8_HI]>, DwarfRegAlias; -def H9 : AArch64Reg<9, "h9", [B9, B9_HI]>, DwarfRegAlias; -def H10 : AArch64Reg<10, "h10", [B10, B10_HI]>, DwarfRegAlias; -def H11 : AArch64Reg<11, "h11", [B11, B11_HI]>, DwarfRegAlias; -def H12 : AArch64Reg<12, "h12", [B12, B12_HI]>, DwarfRegAlias; -def H13 : AArch64Reg<13, "h13", [B13, B13_HI]>, DwarfRegAlias; -def H14 : AArch64Reg<14, "h14", [B14, B14_HI]>, DwarfRegAlias; -def H15 : AArch64Reg<15, "h15", [B15, B15_HI]>, DwarfRegAlias; -def H16 : AArch64Reg<16, "h16", [B16, B16_HI]>, DwarfRegAlias; -def H17 : AArch64Reg<17, "h17", [B17, B17_HI]>, DwarfRegAlias; -def H18 : AArch64Reg<18, "h18", [B18, B18_HI]>, DwarfRegAlias; -def H19 : AArch64Reg<19, "h19", [B19, B19_HI]>, DwarfRegAlias; -def H20 : AArch64Reg<20, "h20", [B20, B20_HI]>, DwarfRegAlias; -def H21 : AArch64Reg<21, "h21", [B21, B21_HI]>, DwarfRegAlias; -def H22 : AArch64Reg<22, "h22", [B22, B22_HI]>, DwarfRegAlias; -def H23 : AArch64Reg<23, "h23", [B23, B23_HI]>, DwarfRegAlias; -def H24 : AArch64Reg<24, "h24", [B24, B24_HI]>, DwarfRegAlias; -def H25 : AArch64Reg<25, "h25", [B25, B25_HI]>, DwarfRegAlias; -def H26 : AArch64Reg<26, "h26", [B26, B26_HI]>, DwarfRegAlias; -def H27 : AArch64Reg<27, "h27", [B27, B27_HI]>, DwarfRegAlias; -def H28 : AArch64Reg<28, "h28", [B28, B28_HI]>, DwarfRegAlias; -def H29 : AArch64Reg<29, "h29", [B29, B29_HI]>, DwarfRegAlias; -def H30 : AArch64Reg<30, "h30", [B30, B30_HI]>, DwarfRegAlias; -def H31 : AArch64Reg<31, "h31", [B31, B31_HI]>, DwarfRegAlias; -} - -let SubRegIndices = [hsub, hsub_hi] in { -def S0 : AArch64Reg<0, "s0", [H0, H0_HI]>, DwarfRegAlias; -def S1 : AArch64Reg<1, "s1", [H1, H1_HI]>, DwarfRegAlias; -def S2 : AArch64Reg<2, "s2", [H2, H2_HI]>, DwarfRegAlias; -def S3 : AArch64Reg<3, "s3", [H3, H3_HI]>, DwarfRegAlias; -def S4 : AArch64Reg<4, "s4", [H4, H4_HI]>, DwarfRegAlias; -def S5 : AArch64Reg<5, "s5", [H5, H5_HI]>, DwarfRegAlias; -def S6 : AArch64Reg<6, "s6", [H6, H6_HI]>, DwarfRegAlias; -def S7 : AArch64Reg<7, "s7", [H7, H7_HI]>, DwarfRegAlias; -def S8 : AArch64Reg<8, "s8", [H8, H8_HI]>, DwarfRegAlias; -def S9 : AArch64Reg<9, "s9", [H9, H9_HI]>, DwarfRegAlias; -def S10 : AArch64Reg<10, "s10", [H10, H10_HI]>, DwarfRegAlias; -def S11 : AArch64Reg<11, "s11", [H11, H11_HI]>, DwarfRegAlias; -def S12 : AArch64Reg<12, "s12", [H12, H12_HI]>, DwarfRegAlias; -def S13 : AArch64Reg<13, "s13", [H13, H13_HI]>, DwarfRegAlias; -def S14 : AArch64Reg<14, "s14", [H14, H14_HI]>, DwarfRegAlias; -def S15 : AArch64Reg<15, "s15", [H15, H15_HI]>, DwarfRegAlias; -def S16 : AArch64Reg<16, "s16", [H16, H16_HI]>, DwarfRegAlias; -def S17 : AArch64Reg<17, "s17", [H17, H17_HI]>, DwarfRegAlias; -def S18 : AArch64Reg<18, "s18", [H18, H18_HI]>, DwarfRegAlias; -def S19 : AArch64Reg<19, "s19", [H19, H19_HI]>, DwarfRegAlias; -def S20 : AArch64Reg<20, "s20", [H20, H20_HI]>, DwarfRegAlias; -def S21 : AArch64Reg<21, "s21", [H21, H21_HI]>, DwarfRegAlias; -def S22 : AArch64Reg<22, "s22", [H22, H22_HI]>, DwarfRegAlias; -def S23 : AArch64Reg<23, "s23", [H23, H23_HI]>, DwarfRegAlias; -def S24 : AArch64Reg<24, "s24", [H24, H24_HI]>, DwarfRegAlias; -def S25 : AArch64Reg<25, "s25", [H25, H25_HI]>, DwarfRegAlias; -def S26 : AArch64Reg<26, "s26", [H26, H26_HI]>, DwarfRegAlias; -def S27 : AArch64Reg<27, "s27", [H27, H27_HI]>, DwarfRegAlias; -def S28 : AArch64Reg<28, "s28", [H28, H28_HI]>, DwarfRegAlias; -def S29 : AArch64Reg<29, "s29", [H29, H29_HI]>, DwarfRegAlias; -def S30 : AArch64Reg<30, "s30", [H30, H30_HI]>, DwarfRegAlias; -def S31 : AArch64Reg<31, "s31", [H31, H31_HI]>, DwarfRegAlias; -} - -let SubRegIndices = [ssub, ssub_hi], RegAltNameIndices = [vreg, vlist1] in { -def D0 : AArch64Reg<0, "d0", [S0, S0_HI], ["v0", ""]>, DwarfRegAlias; -def D1 : AArch64Reg<1, "d1", [S1, S1_HI], ["v1", ""]>, DwarfRegAlias; -def D2 : AArch64Reg<2, "d2", [S2, S2_HI], ["v2", ""]>, DwarfRegAlias; -def D3 : AArch64Reg<3, "d3", [S3, S3_HI], ["v3", ""]>, DwarfRegAlias; -def D4 : AArch64Reg<4, "d4", [S4, S4_HI], ["v4", ""]>, DwarfRegAlias; -def D5 : AArch64Reg<5, "d5", [S5, S5_HI], ["v5", ""]>, DwarfRegAlias; -def D6 : AArch64Reg<6, "d6", [S6, S6_HI], ["v6", ""]>, DwarfRegAlias; -def D7 : AArch64Reg<7, "d7", [S7, S7_HI], ["v7", ""]>, DwarfRegAlias; -def D8 : AArch64Reg<8, "d8", [S8, S8_HI], ["v8", ""]>, DwarfRegAlias; -def D9 : AArch64Reg<9, "d9", [S9, S9_HI], ["v9", ""]>, DwarfRegAlias; -def D10 : AArch64Reg<10, "d10", [S10, S10_HI], ["v10", ""]>, DwarfRegAlias; -def D11 : AArch64Reg<11, "d11", [S11, S11_HI], ["v11", ""]>, DwarfRegAlias; -def D12 : AArch64Reg<12, "d12", [S12, S12_HI], ["v12", ""]>, DwarfRegAlias; -def D13 : AArch64Reg<13, "d13", [S13, S13_HI], ["v13", ""]>, DwarfRegAlias; -def D14 : AArch64Reg<14, "d14", [S14, S14_HI], ["v14", ""]>, DwarfRegAlias; -def D15 : AArch64Reg<15, "d15", [S15, S15_HI], ["v15", ""]>, DwarfRegAlias; -def D16 : AArch64Reg<16, "d16", [S16, S16_HI], ["v16", ""]>, DwarfRegAlias; -def D17 : AArch64Reg<17, "d17", [S17, S17_HI], ["v17", ""]>, DwarfRegAlias; -def D18 : AArch64Reg<18, "d18", [S18, S18_HI], ["v18", ""]>, DwarfRegAlias; -def D19 : AArch64Reg<19, "d19", [S19, S19_HI], ["v19", ""]>, DwarfRegAlias; -def D20 : AArch64Reg<20, "d20", [S20, S20_HI], ["v20", ""]>, DwarfRegAlias; -def D21 : AArch64Reg<21, "d21", [S21, S21_HI], ["v21", ""]>, DwarfRegAlias; -def D22 : AArch64Reg<22, "d22", [S22, S22_HI], ["v22", ""]>, DwarfRegAlias; -def D23 : AArch64Reg<23, "d23", [S23, S23_HI], ["v23", ""]>, DwarfRegAlias; -def D24 : AArch64Reg<24, "d24", [S24, S24_HI], ["v24", ""]>, DwarfRegAlias; -def D25 : AArch64Reg<25, "d25", [S25, S25_HI], ["v25", ""]>, DwarfRegAlias; -def D26 : AArch64Reg<26, "d26", [S26, S26_HI], ["v26", ""]>, DwarfRegAlias; -def D27 : AArch64Reg<27, "d27", [S27, S27_HI], ["v27", ""]>, DwarfRegAlias; -def D28 : AArch64Reg<28, "d28", [S28, S28_HI], ["v28", ""]>, DwarfRegAlias; -def D29 : AArch64Reg<29, "d29", [S29, S29_HI], ["v29", ""]>, DwarfRegAlias; -def D30 : AArch64Reg<30, "d30", [S30, S30_HI], ["v30", ""]>, DwarfRegAlias; -def D31 : AArch64Reg<31, "d31", [S31, S31_HI], ["v31", ""]>, DwarfRegAlias; -} - -let SubRegIndices = [dsub, dsub_hi], RegAltNameIndices = [vreg, vlist1] in { -def Q0 : AArch64Reg<0, "q0", [D0, D0_HI], ["v0", ""]>, DwarfRegAlias; -def Q1 : AArch64Reg<1, "q1", [D1, D1_HI], ["v1", ""]>, DwarfRegAlias; -def Q2 : AArch64Reg<2, "q2", [D2, D2_HI], ["v2", ""]>, DwarfRegAlias; -def Q3 : AArch64Reg<3, "q3", [D3, D3_HI], ["v3", ""]>, DwarfRegAlias; -def Q4 : AArch64Reg<4, "q4", [D4, D4_HI], ["v4", ""]>, DwarfRegAlias; -def Q5 : AArch64Reg<5, "q5", [D5, D5_HI], ["v5", ""]>, DwarfRegAlias; -def Q6 : AArch64Reg<6, "q6", [D6, D6_HI], ["v6", ""]>, DwarfRegAlias; -def Q7 : AArch64Reg<7, "q7", [D7, D7_HI], ["v7", ""]>, DwarfRegAlias; -def Q8 : AArch64Reg<8, "q8", [D8, D8_HI], ["v8", ""]>, DwarfRegAlias; -def Q9 : AArch64Reg<9, "q9", [D9, D9_HI], ["v9", ""]>, DwarfRegAlias; -def Q10 : AArch64Reg<10, "q10", [D10, D10_HI], ["v10", ""]>, DwarfRegAlias; -def Q11 : AArch64Reg<11, "q11", [D11, D11_HI], ["v11", ""]>, DwarfRegAlias; -def Q12 : AArch64Reg<12, "q12", [D12, D12_HI], ["v12", ""]>, DwarfRegAlias; -def Q13 : AArch64Reg<13, "q13", [D13, D13_HI], ["v13", ""]>, DwarfRegAlias; -def Q14 : AArch64Reg<14, "q14", [D14, D14_HI], ["v14", ""]>, DwarfRegAlias; -def Q15 : AArch64Reg<15, "q15", [D15, D15_HI], ["v15", ""]>, DwarfRegAlias; -def Q16 : AArch64Reg<16, "q16", [D16, D16_HI], ["v16", ""]>, DwarfRegAlias; -def Q17 : AArch64Reg<17, "q17", [D17, D17_HI], ["v17", ""]>, DwarfRegAlias; -def Q18 : AArch64Reg<18, "q18", [D18, D18_HI], ["v18", ""]>, DwarfRegAlias; -def Q19 : AArch64Reg<19, "q19", [D19, D19_HI], ["v19", ""]>, DwarfRegAlias; -def Q20 : AArch64Reg<20, "q20", [D20, D20_HI], ["v20", ""]>, DwarfRegAlias; -def Q21 : AArch64Reg<21, "q21", [D21, D21_HI], ["v21", ""]>, DwarfRegAlias; -def Q22 : AArch64Reg<22, "q22", [D22, D22_HI], ["v22", ""]>, DwarfRegAlias; -def Q23 : AArch64Reg<23, "q23", [D23, D23_HI], ["v23", ""]>, DwarfRegAlias; -def Q24 : AArch64Reg<24, "q24", [D24, D24_HI], ["v24", ""]>, DwarfRegAlias; -def Q25 : AArch64Reg<25, "q25", [D25, D25_HI], ["v25", ""]>, DwarfRegAlias; -def Q26 : AArch64Reg<26, "q26", [D26, D26_HI], ["v26", ""]>, DwarfRegAlias; -def Q27 : AArch64Reg<27, "q27", [D27, D27_HI], ["v27", ""]>, DwarfRegAlias; -def Q28 : AArch64Reg<28, "q28", [D28, D28_HI], ["v28", ""]>, DwarfRegAlias; -def Q29 : AArch64Reg<29, "q29", [D29, D29_HI], ["v29", ""]>, DwarfRegAlias; -def Q30 : AArch64Reg<30, "q30", [D30, D30_HI], ["v30", ""]>, DwarfRegAlias; -def Q31 : AArch64Reg<31, "q31", [D31, D31_HI], ["v31", ""]>, DwarfRegAlias; +let SubRegIndices = [bsub] in { +def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias; +def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias; +def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias; +def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias; +def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias; +def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias; +def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias; +def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias; +def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias; +def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias; +def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias; +def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias; +def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias; +def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias; +def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias; +def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias; +def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias; +def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias; +def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias; +def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias; +def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias; +def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias; +def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias; +def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias; +def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias; +def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias; +def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias; +def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias; +def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias; +def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias; +def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias; +def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias; +} + +let SubRegIndices = [hsub] in { +def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias; +def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias; +def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias; +def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias; +def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias; +def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias; +def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias; +def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias; +def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias; +def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias; +def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias; +def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias; +def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias; +def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias; +def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias; +def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias; +def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias; +def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias; +def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias; +def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias; +def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias; +def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias; +def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias; +def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias; +def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias; +def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias; +def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias; +def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias; +def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias; +def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias; +def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias; +def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias; +} + +let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { +def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; +def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; +def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; +def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; +def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; +def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; +def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; +def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; +def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; +def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; +def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; +def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; +def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; +def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; +def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; +def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; +def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; +def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; +def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; +def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; +def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; +def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; +def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; +def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; +def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; +def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; +def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; +def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; +def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; +def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; +def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; +def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; +} + +let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { +def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; +def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; +def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; +def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; +def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; +def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; +def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; +def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; +def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; +def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; +def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; +def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; +def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; +def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; +def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; +def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; +def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; +def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; +def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; +def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; +def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; +def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; +def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; +def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; +def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; +def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; +def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; +def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; +def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; +def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; +def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; +def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; } def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> { @@ -902,39 +872,39 @@ let SubRegIndices = [psub] in { } // SVE variable-size vector registers -let SubRegIndices = [zsub, zsub_hi] in { -def Z0 : AArch64Reg<0, "z0", [Q0, Q0_HI]>, DwarfRegNum<[96]>; -def Z1 : AArch64Reg<1, "z1", [Q1, Q1_HI]>, DwarfRegNum<[97]>; -def Z2 : AArch64Reg<2, "z2", [Q2, Q2_HI]>, DwarfRegNum<[98]>; -def Z3 : AArch64Reg<3, "z3", [Q3, Q3_HI]>, DwarfRegNum<[99]>; -def Z4 : AArch64Reg<4, "z4", [Q4, Q4_HI]>, DwarfRegNum<[100]>; -def Z5 : AArch64Reg<5, "z5", [Q5, Q5_HI]>, DwarfRegNum<[101]>; -def Z6 : AArch64Reg<6, "z6", [Q6, Q6_HI]>, DwarfRegNum<[102]>; -def Z7 : AArch64Reg<7, "z7", [Q7, Q7_HI]>, DwarfRegNum<[103]>; -def Z8 : AArch64Reg<8, "z8", [Q8, Q8_HI]>, DwarfRegNum<[104]>; -def Z9 : AArch64Reg<9, "z9", [Q9, Q9_HI]>, DwarfRegNum<[105]>; -def Z10 : AArch64Reg<10, "z10", [Q10, Q10_HI]>, DwarfRegNum<[106]>; -def Z11 : AArch64Reg<11, "z11", [Q11, Q11_HI]>, DwarfRegNum<[107]>; -def Z12 : AArch64Reg<12, "z12", [Q12, Q12_HI]>, DwarfRegNum<[108]>; -def Z13 : AArch64Reg<13, "z13", [Q13, Q13_HI]>, DwarfRegNum<[109]>; -def Z14 : AArch64Reg<14, "z14", [Q14, Q14_HI]>, DwarfRegNum<[110]>; -def Z15 : AArch64Reg<15, "z15", [Q15, Q15_HI]>, DwarfRegNum<[111]>; -def Z16 : AArch64Reg<16, "z16", [Q16, Q16_HI]>, DwarfRegNum<[112]>; -def Z17 : AArch64Reg<17, "z17", [Q17, Q17_HI]>, DwarfRegNum<[113]>; -def Z18 : AArch64Reg<18, "z18", [Q18, Q18_HI]>, DwarfRegNum<[114]>; -def Z19 : AArch64Reg<19, "z19", [Q19, Q19_HI]>, DwarfRegNum<[115]>; -def Z20 : AArch64Reg<20, "z20", [Q20, Q20_HI]>, DwarfRegNum<[116]>; -def Z21 : AArch64Reg<21, "z21", [Q21, Q21_HI]>, DwarfRegNum<[117]>; -def Z22 : AArch64Reg<22, "z22", [Q22, Q22_HI]>, DwarfRegNum<[118]>; -def Z23 : AArch64Reg<23, "z23", [Q23, Q23_HI]>, DwarfRegNum<[119]>; -def Z24 : AArch64Reg<24, "z24", [Q24, Q24_HI]>, DwarfRegNum<[120]>; -def Z25 : AArch64Reg<25, "z25", [Q25, Q25_HI]>, DwarfRegNum<[121]>; -def Z26 : AArch64Reg<26, "z26", [Q26, Q26_HI]>, DwarfRegNum<[122]>; -def Z27 : AArch64Reg<27, "z27", [Q27, Q27_HI]>, DwarfRegNum<[123]>; -def Z28 : AArch64Reg<28, "z28", [Q28, Q28_HI]>, DwarfRegNum<[124]>; -def Z29 : AArch64Reg<29, "z29", [Q29, Q29_HI]>, DwarfRegNum<[125]>; -def Z30 : AArch64Reg<30, "z30", [Q30, Q30_HI]>, DwarfRegNum<[126]>; -def Z31 : AArch64Reg<31, "z31", [Q31, Q31_HI]>, DwarfRegNum<[127]>; +let SubRegIndices = [zsub] in { +def Z0 : AArch64Reg<0, "z0", [Q0]>, DwarfRegNum<[96]>; +def Z1 : AArch64Reg<1, "z1", [Q1]>, DwarfRegNum<[97]>; +def Z2 : AArch64Reg<2, "z2", [Q2]>, DwarfRegNum<[98]>; +def Z3 : AArch64Reg<3, "z3", [Q3]>, DwarfRegNum<[99]>; +def Z4 : AArch64Reg<4, "z4", [Q4]>, DwarfRegNum<[100]>; +def Z5 : AArch64Reg<5, "z5", [Q5]>, DwarfRegNum<[101]>; +def Z6 : AArch64Reg<6, "z6", [Q6]>, DwarfRegNum<[102]>; +def Z7 : AArch64Reg<7, "z7", [Q7]>, DwarfRegNum<[103]>; +def Z8 : AArch64Reg<8, "z8", [Q8]>, DwarfRegNum<[104]>; +def Z9 : AArch64Reg<9, "z9", [Q9]>, DwarfRegNum<[105]>; +def Z10 : AArch64Reg<10, "z10", [Q10]>, DwarfRegNum<[106]>; +def Z11 : AArch64Reg<11, "z11", [Q11]>, DwarfRegNum<[107]>; +def Z12 : AArch64Reg<12, "z12", [Q12]>, DwarfRegNum<[108]>; +def Z13 : AArch64Reg<13, "z13", [Q13]>, DwarfRegNum<[109]>; +def Z14 : AArch64Reg<14, "z14", [Q14]>, DwarfRegNum<[110]>; +def Z15 : AArch64Reg<15, "z15", [Q15]>, DwarfRegNum<[111]>; +def Z16 : AArch64Reg<16, "z16", [Q16]>, DwarfRegNum<[112]>; +def Z17 : AArch64Reg<17, "z17", [Q17]>, DwarfRegNum<[113]>; +def Z18 : AArch64Reg<18, "z18", [Q18]>, DwarfRegNum<[114]>; +def Z19 : AArch64Reg<19, "z19", [Q19]>, DwarfRegNum<[115]>; +def Z20 : AArch64Reg<20, "z20", [Q20]>, DwarfRegNum<[116]>; +def Z21 : AArch64Reg<21, "z21", [Q21]>, DwarfRegNum<[117]>; +def Z22 : AArch64Reg<22, "z22", [Q22]>, DwarfRegNum<[118]>; +def Z23 : AArch64Reg<23, "z23", [Q23]>, DwarfRegNum<[119]>; +def Z24 : AArch64Reg<24, "z24", [Q24]>, DwarfRegNum<[120]>; +def Z25 : AArch64Reg<25, "z25", [Q25]>, DwarfRegNum<[121]>; +def Z26 : AArch64Reg<26, "z26", [Q26]>, DwarfRegNum<[122]>; +def Z27 : AArch64Reg<27, "z27", [Q27]>, DwarfRegNum<[123]>; +def Z28 : AArch64Reg<28, "z28", [Q28]>, DwarfRegNum<[124]>; +def Z29 : AArch64Reg<29, "z29", [Q29]>, DwarfRegNum<[125]>; +def Z30 : AArch64Reg<30, "z30", [Q30]>, DwarfRegNum<[126]>; +def Z31 : AArch64Reg<31, "z31", [Q31]>, DwarfRegNum<[127]>; } // Enum describing the element size for destructive @@ -1979,15 +1949,6 @@ def svcr_op : Operand, TImmLeaf; - def B_HI_DummyRC : RegisterClass<"AArch64", [untyped], 0, (sequence "B%u_HI", 0, 31)>; - def H_HI_DummyRC : RegisterClass<"AArch64", [untyped], 0, (sequence "H%u_HI", 0, 31)>; - def S_HI_DummyRC : RegisterClass<"AArch64", [untyped], 0, (sequence "S%u_HI", 0, 31)>; - def D_HI_DummyRC : RegisterClass<"AArch64", [untyped], 0, (sequence "D%u_HI", 0, 31)>; - def Q_HI_DummyRC : RegisterClass<"AArch64", [untyped], 0, (sequence "Q%u_HI", 0, 31)>; -} - //===----------------------------------------------------------------------===// // Register categories. // diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ec7bb71fd111ff..7a1e401bca18cb 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -266,16 +266,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return false; } - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // Inline a callee if its target-features are a subset of the callers - // target-features. - return (CallerBits & CalleeBits) == CalleeBits; + return BaseT::areInlineCompatible(Caller, Callee); } bool AArch64TTIImpl::areTypesABICompatible( diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index c8f01068f72189..ad31f29c045990 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -840,13 +840,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_PTRTOINT) .legalFor({{s64, p0}, {v2s64, v2p0}}) .widenScalarToNextPow2(0, 64) - .clampScalar(0, s64, s64); + .clampScalar(0, s64, s64) + .clampMaxNumElements(0, s64, 2); getActionDefinitionsBuilder(G_INTTOPTR) .unsupportedIf([&](const LegalityQuery &Query) { return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); }) - .legalFor({{p0, s64}, {v2p0, v2s64}}); + .legalFor({{p0, s64}, {v2p0, v2s64}}) + .clampMaxNumElements(1, s64, 2); // Casts for 32 and 64-bit width type are just copies. // Same for 128-bit width type, except they are on the FPR bank. @@ -1053,7 +1055,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) if (DstTy != SrcTy) return false; return llvm::is_contained( - {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); + {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); }) // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we // just want those lowered into G_BUILD_VECTOR @@ -1079,7 +1081,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v8s8, v16s8) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v4s32, v4s32) - .clampNumElements(0, v2s64, v2s64); + .clampNumElements(0, v2s64, v2s64) + .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) { + // Bitcast pointers vector to i64. + const LLT DstTy = Query.Types[0]; + return std::pair(0, LLT::vector(DstTy.getElementCount(), 64)); + }); getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}}) @@ -1296,6 +1303,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) .clampMaxNumElements(0, s64, 2) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .moreElementsToNextPow2(0) .lower(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index d3543015d667f9..60b1e58832dad4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -372,10 +372,23 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts", "Additional instructions for GFX940+" >; +def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap", + "HasPermlane16Swap", + "true", + "Has v_permlane16_swap_b32 instructions" +>; + +def FeaturePermlane32Swap : SubtargetFeature<"permlane32-swap", + "HasPermlane32Swap", + "true", + "Has v_permlane32_swap_b32 instructions" +>; + def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts", "GFX950Insts", "true", - "Additional instructions for GFX950+" + "Additional instructions for GFX950+", + [FeaturePermlane16Swap, FeaturePermlane32Swap] >; def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts", @@ -1987,6 +2000,14 @@ def HasGFX950Insts : Predicate<"Subtarget->hasGFX950Insts()">, AssemblerPredicate<(all_of FeatureGFX950Insts)>; +def HasPermlane16Swap : + Predicate<"Subtarget->hasPermlane16Swap()">, + AssemblerPredicate<(all_of FeaturePermlane16Swap)>; + +def HasPermlane32Swap : + Predicate<"Subtarget->hasPermlane32Swap()">, + AssemblerPredicate<(all_of FeaturePermlane32Swap)>; + def isGFX8GFX9NotGFX940 : Predicate<"!Subtarget->hasGFX940Insts() &&" "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 88fa96bd049f29..1b909568fc555c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -395,6 +395,9 @@ def gi_as_i8timm : GICustomOperandRenderer<"renderTruncTImm">, def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm">, GISDNodeXFormEquiv; +def gi_as_i1timm_zext : GICustomOperandRenderer<"renderZextBoolTImm">, + GISDNodeXFormEquiv; + def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, GISDNodeXFormEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 151d56292b53d6..7d78e9cd7eab6f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -408,7 +408,8 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { unsigned AS = cast(N)->getAddressSpace(); if (AS == AMDGPUAS::LOCAL_ADDRESS) { if (Subtarget->ldsRequiresM0Init()) - return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); + return glueCopyToM0( + N, CurDAG->getSignedTargetConstant(-1, SDLoc(N), MVT::i32)); } else if (AS == AMDGPUAS::REGION_ADDRESS) { MachineFunction &MF = CurDAG->getMachineFunction(); unsigned Value = MF.getInfo()->getGDSSize(); @@ -1724,7 +1725,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr, } VAddr = Addr; - Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); + Offset = CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32); return true; } @@ -1832,7 +1833,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, } if (SAddr) { - Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); + Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32); return true; } } @@ -1848,7 +1849,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32, CurDAG->getTargetConstant(0, SDLoc(), MVT::i32)); VOffset = SDValue(VMov, 0); - Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); + Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32); return true; } @@ -1903,13 +1904,13 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, SDValue AddOffset = SAddr.getOpcode() == ISD::TargetFrameIndex ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL) - : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32); + : CurDAG->getSignedTargetConstant(RemainderOffset, DL, MVT::i32); SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32, SAddr, AddOffset), 0); } - Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32); + Offset = CurDAG->getSignedTargetConstant(COffsetVal, DL, MVT::i32); return true; } @@ -2058,7 +2059,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, std::optional EncodedOffset = AMDGPU::getSMRDEncodedOffset( *Subtarget, ByteOffset, IsBuffer, HasSOffset); if (EncodedOffset && Offset && !Imm32Only) { - *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + *Offset = CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } @@ -2777,6 +2778,31 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { case Intrinsic::amdgcn_interp_p1_f16: SelectInterpP1F16(N); return; + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: { + if ((IntrID == Intrinsic::amdgcn_permlane16_swap && + !Subtarget->hasPermlane16Swap()) || + (IntrID == Intrinsic::amdgcn_permlane32_swap && + !Subtarget->hasPermlane32Swap())) { + SelectCode(N); // Hit the default error + return; + } + + Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap + ? AMDGPU::V_PERMLANE16_SWAP_B32_e64 + : AMDGPU::V_PERMLANE32_SWAP_B32_e64; + + SmallVector NewOps(N->op_begin() + 1, N->op_end()); + if (ConvGlueNode) + NewOps.push_back(SDValue(ConvGlueNode, 0)); + + bool FI = N->getConstantOperandVal(3); + NewOps[2] = CurDAG->getTargetConstant( + FI ? AMDGPU::DPP::DPP_FI_1 : AMDGPU::DPP::DPP_FI_0, SDLoc(), MVT::i32); + + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), NewOps); + return; + } default: SelectCode(N); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3cc4bd92f6471a..d77508227b076b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2333,7 +2333,7 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, SDValue RHS = Op.getOperand(1); SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue NegOne = DAG.getConstant(-1, DL, VT); + SDValue NegOne = DAG.getAllOnesConstant(DL, VT); if (VT == MVT::i32) { if (SDValue Res = LowerDIVREM24(Op, DAG, true)) @@ -3794,7 +3794,11 @@ static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, if (Width + Offset < 32) { uint32_t Shl = static_cast(Src0) << (32 - Offset - Width); IntTy Result = static_cast(Shl) >> (32 - Width); - return DAG.getConstant(Result, DL, MVT::i32); + if constexpr (std::is_signed_v) { + return DAG.getSignedConstant(Result, DL, MVT::i32); + } else { + return DAG.getConstant(Result, DL, MVT::i32); + } } return DAG.getConstant(Src0 >> Offset, DL, MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 087de1bed86f76..18a09c39a06387 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1024,6 +1024,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { } break; } + case Intrinsic::amdgcn_wavefrontsize: { + if (ST->isWaveSizeKnown()) + return IC.replaceInstUsesWith( + II, ConstantInt::get(II.getType(), ST->getWavefrontSize())); + break; + } case Intrinsic::amdgcn_wqm_vote: { // wqm_vote is identity when the argument is constant. if (!isa(II.getArgOperand(0))) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 702f6e67c55271..bec294a945d2fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -100,8 +100,8 @@ def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN", >; def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", - SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, - [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] + SDTypeProfile<0, 1, [SDTCisVT<0, i16>]>, + [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue] >; def AMDGPUconstdata_ptr : SDNode< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 4415d23f8b57f0..39bec6c7f2f56d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1100,7 +1100,14 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8: case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8: case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: return selectSMFMACIntrin(I); + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: + return selectPermlaneSwapIntrin(I, IntrinsicID); default: return selectImpl(I, *CoverageInfo); } @@ -3551,6 +3558,18 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8: Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64; break; + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64; + break; default: llvm_unreachable("unhandled smfmac intrinsic"); } @@ -3565,6 +3584,29 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { return true; } +bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin( + MachineInstr &MI, Intrinsic::ID IntrID) const { + if (IntrID == Intrinsic::amdgcn_permlane16_swap && + !Subtarget->hasPermlane16Swap()) + return false; + if (IntrID == Intrinsic::amdgcn_permlane32_swap && + !Subtarget->hasPermlane32Swap()) + return false; + + unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap + ? AMDGPU::V_PERMLANE16_SWAP_B32_e64 + : AMDGPU::V_PERMLANE32_SWAP_B32_e64; + + MI.removeOperand(2); + MI.setDesc(TII.get(Opcode)); + MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + + MachineOperand &FI = MI.getOperand(4); + FI.setImm(FI.getImm() ? AMDGPU::DPP::DPP_FI_1 : AMDGPU::DPP::DPP_FI_0); + + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -5753,6 +5795,12 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB, MIB.addImm(Op.getImm()); } +void AMDGPUInstructionSelector::renderZextBoolTImm(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + MIB.addImm(MI.getOperand(OpIdx).getImm() != 0); +} + void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 563e40267f04b1..5b31cb827c9715 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -145,6 +145,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectGlobalLoadLds(MachineInstr &MI) const; bool selectBVHIntrinsic(MachineInstr &I) const; bool selectSMFMACIntrin(MachineInstr &I) const; + bool selectPermlaneSwapIntrin(MachineInstr &I, Intrinsic::ID IntrID) const; bool selectWaveAddress(MachineInstr &I) const; bool selectStackRestore(MachineInstr &MI) const; bool selectNamedBarrierInit(MachineInstr &I, Intrinsic::ID IID) const; @@ -328,6 +329,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector { void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; + void renderZextBoolTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; void renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ea951e409a4b10..8c050348f753bb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3146,6 +3146,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_interp_inreg_p2_f16: case Intrinsic::amdgcn_interp_p10_rtz_f16: case Intrinsic::amdgcn_interp_p2_rtz_f16: + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: applyDefaultMapping(OpdMapper); return; case Intrinsic::amdgcn_permlane16: @@ -4814,7 +4816,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8: case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8: case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8: - case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8: { + case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8: { // vdst, srcA, srcB, srcC, idx OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); @@ -4856,6 +4862,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); break; } + case Intrinsic::amdgcn_permlane16_swap: + case Intrinsic::amdgcn_permlane32_swap: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = OpdsMapping[1] = OpdsMapping[3] = OpdsMapping[4] = + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); + break; + } case Intrinsic::amdgcn_ballot: { unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 2ea254e64b8cb8..bc8b373d06e01a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -326,6 +326,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; foreach intr = AMDGPUMFMAIntrinsics908 in def : SourceOfDivergence; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index f90121a86c846c..136fe2e3f90d02 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -45,26 +45,10 @@ using namespace llvm; using DecodeStatus = llvm::MCDisassembler::DecodeStatus; -static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI, - MCContext &Ctx) { - if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) && - !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) { - MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI); - // If there is no default wave size it must be a generation before gfx10, - // these have FeatureWavefrontSize64 in their definition already. For gfx10+ - // set wave32 as a default. - STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32); - return STICopy; - } - - return STI; -} - AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) - : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII), - MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()), - TargetMaxInstBytes(MAI.getMaxInstLength(&STI)), + : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), + MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)), CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) { // ToDo: AMDGPUDisassembler supports only VI ISA. if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus()) @@ -1842,28 +1826,28 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { STI.hasFeature(AMDGPU::FeatureGFX10)) && "SDWAVopcDst should be present only on GFX9+"); - bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64); + bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32); if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; int TTmpIdx = getTTmpIdx(Val); if (TTmpIdx >= 0) { - auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32); + auto TTmpClsId = getTtmpClassId(IsWave32 ? OPW32 : OPW64); return createSRegOperand(TTmpClsId, TTmpIdx); } if (Val > SGPR_MAX) { - return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val); + return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val); } - return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val); + return createSRegOperand(getSgprClassId(IsWave32 ? OPW32 : OPW64), Val); } - return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO); + return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC); } MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const { - return STI.hasFeature(AMDGPU::FeatureWavefrontSize64) - ? decodeSrcOp(OPW64, Val) - : decodeSrcOp(OPW32, Val); + return STI.hasFeature(AMDGPU::FeatureWavefrontSize32) + ? decodeSrcOp(OPW32, Val) + : decodeSrcOp(OPW64, Val); } MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const { diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 44afccb0690d0d..218f487f7e12ce 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2232,12 +2232,14 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) { } static int -GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) { - // 2 pass -> 3 - // 4 pass -> 5 - // 8 pass -> 9 - // 16 pass -> 17 - return NumPasses + 1; +GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses, + bool IsGFX950) { + // xdl def cycles | gfx940 | gfx950 + // 2 pass | 3 4 + // 4 pass | 5 6 + // 8 pass | 9 10 + // 16 pass | 17 18 + return NumPasses + 1 + IsGFX950; } static int @@ -2300,12 +2302,14 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9; const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17; const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9; + const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17; const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4; const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; + const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; @@ -2357,7 +2361,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: if (!isXDL(ST, *MI)) - NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates; + NeedWaitStates = + ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates + : DMFMA16x16WritesVGPROverlappedSrcCWaitStates; break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: @@ -2373,7 +2380,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { NeedWaitStates = isXDL(ST, *MI1) ? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( - NumPasses) + NumPasses, ST.hasGFX950Insts()) : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates( NumPasses); break; @@ -2408,7 +2415,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64: case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64: - NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; + NeedWaitStates = + ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates + : DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates; break; case AMDGPU::V_MFMA_F64_4X4X4F64_e64: case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64: @@ -2603,6 +2613,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; const int DMFMA16x16WriteVgprVALUReadWaitStates = 11; + const int GFX950_DMFMA16x16WriteVgprVALUReadWaitStates = 19; const int DotWriteSameDotReadSrcAB = 3; const int DotWriteDifferentVALURead = 3; const int DMFMABetweenVALUWriteVMEMRead = 2; @@ -2663,9 +2674,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { break; case 8: case 16: - NeedWaitStates = IsMemOrExport - ? DMFMA16x16WriteVgprMemExpReadWaitStates - : DMFMA16x16WriteVgprVALUReadWaitStates; + NeedWaitStates = + IsMemOrExport + ? DMFMA16x16WriteVgprMemExpReadWaitStates + : (ST.hasGFX950Insts() + ? GFX950_DMFMA16x16WriteVgprVALUReadWaitStates + : DMFMA16x16WriteVgprVALUReadWaitStates); break; default: llvm_unreachable("unexpected dgemm"); diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index 3403cbab526d46..a86c76bb6075e8 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,11 +9,11 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [FeatureWavefrontSize64, FeatureGDS, FeatureGWS] + [FeatureGDS, FeatureGWS] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace] + [FeatureGDS, FeatureGWS, FeatureFlatAddressSpace] >; //===------------------------------------------------------------===// @@ -204,7 +204,7 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel, FeatureISAVersion9_4_2.Features >; -def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel, +def : ProcessorModel<"gfx950", SIDPGFX950FullSpeedModel, FeatureISAVersion9_5_0.Features >; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp index 6233ca2eb4f1dd..51361b75940560 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp @@ -100,14 +100,16 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, if (Gen == AMDGPUSubtarget::INVALID) { Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS : AMDGPUSubtarget::SOUTHERN_ISLANDS; - } - - if (!hasFeature(AMDGPU::FeatureWavefrontSize32) && - !hasFeature(AMDGPU::FeatureWavefrontSize64)) { + // Assume wave64 for the unknown target, if not explicitly set. + if (getWavefrontSizeLog2() == 0) + WavefrontSizeLog2 = 6; + } else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) && + !hasFeature(AMDGPU::FeatureWavefrontSize64)) { // If there is no default wave size it must be a generation before gfx10, // these have FeatureWavefrontSize64 in their definition already. For gfx10+ // set wave32 as a default. ToggleFeature(AMDGPU::FeatureWavefrontSize32); + WavefrontSizeLog2 = getGeneration() >= AMDGPUSubtarget::GFX10 ? 5 : 6; } // We don't support FP64 for EG/NI atm. @@ -147,10 +149,6 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, !getFeatureBits().test(AMDGPU::FeatureCuMode)) LocalMemorySize *= 2; - // Don't crash on invalid devices. - if (WavefrontSizeLog2 == 0) - WavefrontSizeLog2 = 5; - HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; HasSMulHi = getGeneration() >= AMDGPUSubtarget::GFX9; @@ -166,7 +164,7 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, void GCNSubtarget::checkSubtargetFeatures(const Function &F) const { LLVMContext &Ctx = F.getContext(); - if (hasFeature(AMDGPU::FeatureWavefrontSize32) == + if (hasFeature(AMDGPU::FeatureWavefrontSize32) && hasFeature(AMDGPU::FeatureWavefrontSize64)) { Ctx.diagnose(DiagnosticInfoUnsupported( F, "must specify exactly one of wavefrontsize32 and wavefrontsize64")); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f3f96940c1f44b..18219174b16b1e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -221,6 +221,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasPseudoScalarTrans = false; bool HasRestrictedSOffset = false; bool HasPrngInst = false; + bool HasPermlane16Swap = false; + bool HasPermlane32Swap = false; bool HasVcmpxPermlaneHazard = false; bool HasVMEMtoScalarWriteHazard = false; bool HasSMEMtoVectorWriteHazard = false; @@ -1319,6 +1321,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// \returns true if the target has instructions with xf32 format support. bool hasXF32Insts() const { return HasXF32Insts; } + bool hasPermlane16Swap() const { return HasPermlane16Swap; } + bool hasPermlane32Swap() const { return HasPermlane32Swap; } + bool hasMinimum3Maximum3F32() const { return HasMinimum3Maximum3F32; } @@ -1564,6 +1569,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return getWavefrontSize() == 64; } + /// Returns if the wavesize of this subtarget is known reliable. This is false + /// only for the a default target-cpu that does not have an explicit + /// +wavefrontsize target feature. + bool isWaveSizeKnown() const { + return hasFeature(AMDGPU::FeatureWavefrontSize32) || + hasFeature(AMDGPU::FeatureWavefrontSize64); + } + const TargetRegisterClass *getBoolRC() const { return getRegisterInfo()->getBoolRC(); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 344028c4b48689..e21aa70c9859a0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -649,9 +649,9 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand, raw_ostream &O) { if (!FirstOperand) O << ", "; - printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64) - ? AMDGPU::VCC - : AMDGPU::VCC_LO, + printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize32) + ? AMDGPU::VCC_LO + : AMDGPU::VCC, O, MRI); if (FirstOperand) O << ", "; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 29be64625811f7..c692895d84c002 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -77,7 +77,22 @@ static MCSubtargetInfo * createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (TT.getArch() == Triple::r600) return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); - return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + + MCSubtargetInfo *STI = + createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + + // FIXME: We should error for the default target. + if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) && + !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) { + // If there is no default wave size it must be a generation before gfx10, + // these have FeatureWavefrontSize64 in their definition already. For gfx10+ + // set wave32 as a default. + STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI) + ? AMDGPU::FeatureWavefrontSize32 + : AMDGPU::FeatureWavefrontSize64); + } + + return STI; } static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T, diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 1b88fdd3ab2e1c..c2e952418f1be2 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -919,7 +919,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); } else if (CompareVT == MVT::i32) { - HWTrue = DAG.getConstant(-1, DL, CompareVT); + HWTrue = DAG.getAllOnesConstant(DL, CompareVT); HWFalse = DAG.getConstant(0, DL, CompareVT); } else { @@ -949,7 +949,7 @@ SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op, unsigned DestAS = ASC->getDestAddressSpace(); if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS) - return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT); + return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT); return Op; } @@ -1750,11 +1750,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), - SelectCC.getOperand(0), // LHS - SelectCC.getOperand(1), // RHS - DAG.getConstant(-1, DL, MVT::i32), // True - DAG.getConstant(0, DL, MVT::i32), // False - SelectCC.getOperand(4)); // CC + SelectCC.getOperand(0), // LHS + SelectCC.getOperand(1), // RHS + DAG.getAllOnesConstant(DL, MVT::i32), // True + DAG.getConstant(0, DL, MVT::i32), // False + SelectCC.getOperand(4)); // CC } // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e520dfff1016b2..f3b5e6985e8e0d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4019,10 +4019,11 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op, Align StackAlign = TFL->getStackAlign(); Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value if (Alignment && *Alignment > StackAlign) { - Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Alignment->value() - << Subtarget->getWavefrontSizeLog2(), - dl, VT)); + Tmp1 = DAG.getNode( + ISD::AND, dl, VT, Tmp1, + DAG.getSignedConstant(-(uint64_t)Alignment->value() + << Subtarget->getWavefrontSizeLog2(), + dl, VT)); } Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain @@ -6771,10 +6772,10 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const { // TODO: This should be a generic narrowing legalization, and can easily be // for GlobalISel. - SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT); + SDValue MinExp = DAG.getSignedConstant(minIntN(16), DL, ExpVT); SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp); - SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT); + SDValue MaxExp = DAG.getSignedConstant(maxIntN(16), DL, ExpVT); SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp); SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp); @@ -7542,11 +7543,11 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SDValue Vec0 = SVN->getOperand(VecIdx0); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec0, - DAG.getConstant(EltIdx0, SL, MVT::i32)); + DAG.getSignedConstant(EltIdx0, SL, MVT::i32)); SDValue Vec1 = SVN->getOperand(VecIdx1); SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec1, - DAG.getConstant(EltIdx1, SL, MVT::i32)); + DAG.getSignedConstant(EltIdx1, SL, MVT::i32)); Pieces.push_back(DAG.getBuildVector(PackVT, SL, {Elt0, Elt1})); } } @@ -9618,7 +9619,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait if (ST.hasSplitBarriers()) { SDValue K = - DAG.getTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32); + DAG.getSignedTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32); SDValue BarSignal = SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_SIGNAL_IMM, DL, MVT::Other, K, Op.getOperand(0)), @@ -11173,8 +11174,9 @@ SDValue SITargetLowering::lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const { SqrtS = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, SqrtID, SqrtX, Flags); SDValue SqrtSAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, SqrtS); - SDValue SqrtSNextDownInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt, - DAG.getConstant(-1, DL, MVT::i32)); + SDValue SqrtSNextDownInt = + DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt, + DAG.getAllOnesConstant(DL, MVT::i32)); SDValue SqrtSNextDown = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextDownInt); SDValue NegSqrtSNextDown = @@ -11296,7 +11298,7 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const { SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2); - SDValue ScaleDownFactor = DAG.getConstant(-128, DL, MVT::i32); + SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32); SDValue ScaleDown = DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt); SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags); @@ -14689,7 +14691,7 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, (CRHS->isZero() && (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE))) return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), - DAG.getConstant(-1, SL, MVT::i1)); + DAG.getAllOnesConstant(SL, MVT::i1)); if ((CRHS->isAllOnes() && (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) || (CRHS->isZero() && @@ -14715,7 +14717,7 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, if ((CF == CRHSVal && CC == ISD::SETEQ) || (CT == CRHSVal && CC == ISD::SETNE)) return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), - DAG.getConstant(-1, SL, MVT::i1)); + DAG.getAllOnesConstant(SL, MVT::i1)); if ((CF == CRHSVal && CC == ISD::SETNE) || (CT == CRHSVal && CC == ISD::SETEQ)) return LHS.getOperand(0); @@ -16677,8 +16679,8 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) - return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass - : &AMDGPU::SReg_32RegClass; + return Subtarget->isWave64() ? &AMDGPU::SReg_64RegClass + : &AMDGPU::SReg_32RegClass; if (!TRI->isSGPRClass(RC) && !isDivergent) return TRI->getEquivalentSGPRClass(RC); if (TRI->isSGPRClass(RC) && isDivergent) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b7c008235fb7ae..4a94d690297949 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4468,7 +4468,11 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, // Check output modifiers return !hasModifiersSet(MI, AMDGPU::OpName::omod) && !hasModifiersSet(MI, AMDGPU::OpName::clamp) && - !hasModifiersSet(MI, AMDGPU::OpName::byte_sel); + !hasModifiersSet(MI, AMDGPU::OpName::byte_sel) && + // TODO: Can we avoid checking bound_ctrl/fi here? + // They are only used by permlane*_swap special case. + !hasModifiersSet(MI, AMDGPU::OpName::bound_ctrl) && + !hasModifiersSet(MI, AMDGPU::OpName::fi); } // Set VCC operand with all flags from \p Orig, except for setting it as @@ -7699,8 +7703,8 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, // Insert a trivial select instead of creating a copy, because a copy from // SCC would semantically mean just copying a single bit, but we may need // the result to be a vector condition mask that needs preserving. - unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64 - : AMDGPU::S_CSELECT_B32; + unsigned Opcode = + ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; auto NewSelect = BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0); NewSelect->getOperand(3).setIsUndef(Cond.isUndef()); @@ -8712,7 +8716,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { } // IndexStride = 64 / 32. - uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2; + uint64_t IndexStride = ST.isWave64() ? 3 : 2; Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index afb0b2cede045c..7f77270a931835 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, +def isWave32 : Predicate<"Subtarget->isWave32()">, AssemblerPredicate <(all_of FeatureWavefrontSize32)>; -def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, +def isWave64 : Predicate<"Subtarget->isWave64()">, AssemblerPredicate <(all_of FeatureWavefrontSize64)>; class AMDGPUMnemonicAlias @@ -807,32 +807,38 @@ def as_i1timm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); }]>; +def as_i1timm_zext : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + def as_i8imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); }]>; def as_i8timm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); }]>; def as_i16imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); }]>; def as_i16timm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); + // Explicit cast, as this is used with both signed and unsigned immediates. + return CurDAG->getSignedTargetConstant(int16_t(N->getSExtValue()), SDLoc(N), + MVT::i16); }]>; def as_i32imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; def as_i32timm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; def as_i64imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); + return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); }]>; def cond_as_i32imm: SDNodeXForm; def VOP_V4I32_V4I32_V8I32_I32 : VOPProfile <[v4i32, v4i32, v8i32, i32]>; def VOP_V16I32_V4I32_V8I32_I32 : VOPProfile <[v16i32, v4i32, v8i32, i32]>; def VOP_V4F32_V4I32_V8I32_I32 : VOPProfile <[v4f32, v4i32, v8i32, i32]>; +def VOP_V16F32_V4I32_V8I32_I32 : VOPProfile <[v16f32, v4i32, v8i32, i32]>; def VOP_V4F32_V8F16_V8F16_V4F32 : VOPProfile <[v4f32, v8f16, v8f16, v4f32]>; def VOP_V16F32_V8F16_V8F16_V16F32 : VOPProfile <[v16f32, v8f16, v8f16, v16f32]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3f211e7cbdde50..bc25d75131cc35 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3727,7 +3727,7 @@ def FPPow2ToExponentXForm : SDNodeXFormgetValueAPF(); int Log2 = APF.getExactLog2Abs(); assert(Log2 != INT_MIN); - return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32); + return CurDAG->getSignedTargetConstant(Log2, SDLoc(N), MVT::i32); }]>; // Check if a floating point value is a power of 2 floating-point diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index a60b1f28e9d34c..117add324db565 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -64,6 +64,7 @@ def Write8PassMAI : SchedWrite; def Write16PassMAI : SchedWrite; def Write4PassDGEMM : SchedWrite; def Write8PassDGEMM : SchedWrite; +def Write16PassDGEMM : SchedWrite; // Scalar float instructions def WriteSFPU : SchedWrite; @@ -94,6 +95,7 @@ def SIFullSpeedModel : SISchedMachineModel; def SIQuarterSpeedModel : SISchedMachineModel; def SIDPFullSpeedModel : SISchedMachineModel; def SIDPGFX940FullSpeedModel : SISchedMachineModel; +def SIDPGFX950FullSpeedModel : SISchedMachineModel; def GFX10SpeedModel : SISchedMachineModel; def GFX11SpeedModel : SISchedMachineModel; def GFX12SpeedModel : SISchedMachineModel; @@ -169,6 +171,8 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes; let ReleaseAtCycles = [8] in def : HWVALUWriteRes; + let ReleaseAtCycles = [16] in + def : HWVALUWriteRes; let ReleaseAtCycles = [2] in def : HWWriteRes; @@ -201,6 +205,13 @@ def WriteCopy : SchedWriteVariant<[ SchedVar, SchedVar]>; +// Check if any matrix inputs are interpreted as f8 in an f8f6f4 mfma +// instruction. +def PredIsF8_MFMA_SCALE : SchedPredicate<[{ + TII->getNamedOperand(*MI, AMDGPU::OpName::cbsz)->getImm() <= AMDGPU::MFMAScaleFormats::FP8_E5M2 || + TII->getNamedOperand(*MI, AMDGPU::OpName::blgp)->getImm() <= AMDGPU::MFMAScaleFormats::FP8_E5M2 +}]>; + let SchedModel = SIFullSpeedModel in { defm : SICommonWriteRes; @@ -299,6 +310,58 @@ def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; } // End SchedModel = SIDPGFX940FullSpeedModel + +let SchedModel = SIDPGFX950FullSpeedModel in { +defm : SICommonWriteRes; + +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; +def : HWVALUWriteRes; + +def : InstRW<[WriteCopy], (instrs COPY)>; +def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>; +def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>; + +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>; +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X64")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>; + +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X32_")>; +def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>; + +def : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>; +def : InstRW<[Write16PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>; + +def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>; +def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>; + + +// If either matrix format is f8, the instruction takes 2x as many +// cycles. TODO: This isn't reflected in MCA. +def WriteMFMAScale_16X16X128_F8F6F4 : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def WriteMFMAScale_32X32X64_F8F6F4 : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def : InstRW<[WriteMFMAScale_16X16X128_F8F6F4, MIMFMARead], + (instregex "^V_MFMA(_SCALE)?_.32_16X16X128_F8F6F4")>; +def : InstRW<[WriteMFMAScale_32X32X64_F8F6F4, MIMFMARead], + (instregex "^V_MFMA(_SCALE)?_.32_32X32X64_F8F6F4")>; + +} // End SchedModel = SIDPGFX950FullSpeedModel + + let SchedModel = GFX10SpeedModel in { // The latency values are 1 / (operations / cycle). diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 3cda173207dfb1..1dd39be9e8d9c7 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -380,6 +380,24 @@ def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; } +def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> { + let Outs32 = (outs DstRC:$vdst, VRegSrc_32:$src0_out); + let Outs64 = (outs DstRC64:$vdst, VRegSrc_32:$src0_out); + + let Src0RC32 = VRegSrc_32; + let Src0RC64 = VRegSrc_32; + let HasClamp = 0; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; + let HasExtSDWA = 0; + + let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0); + let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); + let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); + let Asm64 = "$vdst, $src0$bound_ctrl$fi"; + let AsmVOP3OpSel = "$vdst, $src0$bound_ctrl$fi"; +} + // Special case because there are no true output operands. Hack vdst // to be a src operand. The custom inserter must add a tied implicit // def and use of the super register since there seems to be no way to @@ -767,6 +785,18 @@ let SubtargetPredicate = isGFX11Plus in { let SubtargetPredicate = HasPrngInst in defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>; +let Constraints = "$vdst = $vdst_in, $src0_out = $src0", + DisableEncoding="$vdst_in,$src0_out", + SchedRW = [Write32Bit, Write32Bit] in { +let SubtargetPredicate = HasPermlane16Swap in { +defm V_PERMLANE16_SWAP_B32 : VOP1Inst<"v_permlane16_swap_b32", VOP_PERMLANE_SWAP>; +} + +let SubtargetPredicate = HasPermlane32Swap in { +defm V_PERMLANE32_SWAP_B32 : VOP1Inst<"v_permlane32_swap_b32", VOP_PERMLANE_SWAP>; +} +} + foreach vt = Reg32Types.types in { def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)), (vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0))) @@ -1512,6 +1542,20 @@ let DecoderNamespace = "GFX9" in { } } +/// Special case of VOP1 instructions, with a VOP3 form where op_sel +/// is used for DPP operands. +multiclass VOP1_OpSel_Real_e32e64_gfx9 op> { + let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { + def _e32_gfx9 : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.GFX9>, + VOP1e(NAME#"_e32").Pfl>; + + def _e64_gfx9 : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>, + VOP3OpSelIsDPP_gfx9(NAME#"_e64").Pfl>; + } +} + defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let AssemblerPredicate = isGFX940Plus in @@ -1525,6 +1569,8 @@ defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>; +defm V_PERMLANE16_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x059>; +defm V_PERMLANE32_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x05a>; class MovDPP8Pattern : GCNPat < (vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)), diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index a487c311fe13a3..5d8dc5ccd18e55 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -644,6 +644,7 @@ def VOPProfileSMFMAC_I32_16X16X128_I8 : VOPProfileSMFMAC; def VOPProfileSMFMAC_F32_16X16X128_F8 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_32X32X64_F8 : VOPProfileSMFMAC; def VOPProfileMAI_F32_V8F16_X32 : VOPProfileMAI; def VOPProfileMAI_F32_V8F16_X32_VCD : VOPProfileMAI; @@ -1061,6 +1062,10 @@ defm V_SMFMAC_F32_16X16X128_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x128_bf8_bf8 defm V_SMFMAC_F32_16X16X128_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x128_bf8_fp8", "F32_16X16X128_F8", int_amdgcn_smfmac_f32_16x16x128_bf8_fp8>; defm V_SMFMAC_F32_16X16X128_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x128_fp8_bf8", "F32_16X16X128_F8", int_amdgcn_smfmac_f32_16x16x128_fp8_bf8>; defm V_SMFMAC_F32_16X16X128_FP8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x128_fp8_fp8", "F32_16X16X128_F8", int_amdgcn_smfmac_f32_16x16x128_fp8_fp8>; +defm V_SMFMAC_F32_32X32X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x64_bf8_bf8", "F32_32X32X64_F8", int_amdgcn_smfmac_f32_32x32x64_bf8_bf8>; +defm V_SMFMAC_F32_32X32X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x64_bf8_fp8", "F32_32X32X64_F8", int_amdgcn_smfmac_f32_32x32x64_bf8_fp8>; +defm V_SMFMAC_F32_32X32X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x64_fp8_bf8", "F32_32X32X64_F8", int_amdgcn_smfmac_f32_32x32x64_fp8_bf8>; +defm V_SMFMAC_F32_32X32X64_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x64_fp8_fp8", "F32_32X32X64_F8", int_amdgcn_smfmac_f32_32x32x64_fp8_fp8>; } def MAIInstInfoTable : GenericTable { @@ -2163,6 +2168,10 @@ defm V_SMFMAC_F32_16X16X128_BF8_BF8 : VOP3P_Real_SMFMAC <0x3b, "v_smfmac_f32_16x defm V_SMFMAC_F32_16X16X128_BF8_FP8 : VOP3P_Real_SMFMAC <0x3c, "v_smfmac_f32_16x16x128bf8fp8">; defm V_SMFMAC_F32_16X16X128_FP8_BF8 : VOP3P_Real_SMFMAC <0x3d, "v_smfmac_f32_16x16x128fp8bf8">; defm V_SMFMAC_F32_16X16X128_FP8_FP8 : VOP3P_Real_SMFMAC <0x43, "v_smfmac_f32_16x16x128fp8fp8">; +defm V_SMFMAC_F32_32X32X64_BF8_BF8 : VOP3P_Real_SMFMAC <0x4b, "v_smfmac_f32_32x32x64bf8bf8">; +defm V_SMFMAC_F32_32X32X64_BF8_FP8 : VOP3P_Real_SMFMAC <0x4e, "v_smfmac_f32_32x32x64bf8fp8">; +defm V_SMFMAC_F32_32X32X64_FP8_BF8 : VOP3P_Real_SMFMAC <0x4f, "v_smfmac_f32_32x32x64fp8bf8">; +defm V_SMFMAC_F32_32X32X64_FP8_FP8 : VOP3P_Real_SMFMAC <0x53, "v_smfmac_f32_32x32x64fp8fp8">; defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index eb9d00972468c2..34c7989b9d0b86 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -324,6 +324,18 @@ class VOP3OpSel_gfx9 op, VOPProfile P> : VOP3e_vi { let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0); } +// Special case for v_permlane16_swap_b32/v_permlane32_swap_b32 +// op_sel[0]/op_sel[1] are treated as bound_ctrl and fi dpp operands. +class VOP3OpSelIsDPP_gfx9 op, VOPProfile P> : VOP3e_vi { + bits<1> fi; + bits<1> bound_ctrl; + + // OPSEL[0] specifies FI + let Inst{11} = fi; + // OPSEL[1] specifies BOUND_CTRL + let Inst{12} = bound_ctrl; +} + class VOP3OpSel_gfx10 op, VOPProfile p> : VOP3e_gfx10 { let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0); let Inst{12} = !if(p.HasSrc1, src1_modifiers{2}, 0); diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.td b/llvm/lib/Target/ARC/ARCInstrInfo.td index 693bc8a78bc5b1..f26b49119cabaf 100644 --- a/llvm/lib/Target/ARC/ARCInstrInfo.td +++ b/llvm/lib/Target/ARC/ARCInstrInfo.td @@ -55,8 +55,7 @@ def ARCcmp : SDNode<"ARCISD::CMP", SDT_ARCcmptst, [SDNPOutGlue]>; def ARCcmov : SDNode<"ARCISD::CMOV", SDT_ARCcmov, [SDNPInGlue]>; // Conditional Branch -def ARCbrcc : SDNode<"ARCISD::BRcc", SDT_ARCbrcc, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def ARCbrcc : SDNode<"ARCISD::BRcc", SDT_ARCbrcc, [SDNPHasChain]>; // Direct Call def ARCBranchLink : SDNode<"ARCISD::BL",SDT_ARCBranchLink, diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index aad305cce03961..a1f068f0e049bd 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -299,6 +299,8 @@ const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) return &ARM::rGPRRegClass; // Can't copy CCR registers. + if (RC == &ARM::cl_FPSCR_NZCVRegClass) + return &ARM::rGPRRegClass; return RC; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 84b37ae6833aed..6b290135c5bcba 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4971,14 +4971,14 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, bool Signaling) const { assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); - SDValue Cmp; + SDValue Flags; if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, - dl, MVT::Glue, LHS, RHS); + Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT, + LHS, RHS); else - Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, - dl, MVT::Glue, LHS); - return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); + Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl, + FlagsVT, LHS); + return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags); } /// duplicateCmp - Glue values can have only one use, so this function @@ -4991,15 +4991,11 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); - Cmp = Cmp.getOperand(0); - Opc = Cmp.getOpcode(); - if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); - else { - assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); - } - return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); + SDValue Flags = Cmp.getOperand(0); + assert((Flags.getOpcode() == ARMISD::CMPFP || + Flags.getOpcode() == ARMISD::CMPFPw0) && + "unexpected operand of FMSTAT"); + return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags); } // This function returns three things: the arithmetic computation itself diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 5b49f728ebb8d8..a29753909ea992 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -10,7 +10,17 @@ // //===----------------------------------------------------------------------===// -def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_CMPFP : SDTypeProfile<1, 2, [ + SDTCisVT<0, FlagsVT>, // out flags + SDTCisFP<1>, // lhs + SDTCisSameAs<2, 1> // rhs +]>; + +def SDT_CMPFP0 : SDTypeProfile<1, 1, [ + SDTCisVT<0, FlagsVT>, // out flags + SDTCisFP<1> // operand +]>; + def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -18,11 +28,18 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; -def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; -def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; -def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>; -def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>; +def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>; +def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>; +def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>; + +def arm_fmstat : SDNode<"ARMISD::FMSTAT", + SDTypeProfile<0, 1, [ + SDTCisVT<0, FlagsVT> // in flags + ]>, + [SDNPOutGlue] // TODO: Change Glue to a normal result. +>; + def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>; @@ -606,12 +623,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "", - [(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>; + [(set FPSCR_NZCV, (arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm)))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "", - [(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> { + [(set FPSCR_NZCV, (arm_cmpfpe SPR:$Sd, SPR:$Sm))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -620,17 +637,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", - [(arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm))]>; + [(set FPSCR_NZCV, (arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm)))]>; def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; + [(set FPSCR_NZCV, (arm_cmpfp DPR:$Dd, (f64 DPR:$Dm)))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "", - [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { + [(set FPSCR_NZCV, (arm_cmpfp SPR:$Sd, SPR:$Sm))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -639,7 +656,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", - [(arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm))]>; + [(set FPSCR_NZCV, (arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm)))]>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -669,7 +686,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "", - [(arm_cmpfpe0 (f64 DPR:$Dd))]> { + [(set FPSCR_NZCV, (arm_cmpfpe0 (f64 DPR:$Dd)))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -677,7 +694,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "", - [(arm_cmpfpe0 SPR:$Sd)]> { + [(set FPSCR_NZCV, (arm_cmpfpe0 SPR:$Sd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -689,7 +706,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", - [(arm_cmpfpe0 (f16 HPR:$Sd))]> { + [(set FPSCR_NZCV, (arm_cmpfpe0 (f16 HPR:$Sd)))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -697,7 +714,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "", - [(arm_cmpfp0 (f64 DPR:$Dd))]> { + [(set FPSCR_NZCV, (arm_cmpfp0 (f64 DPR:$Dd)))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -705,7 +722,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "", - [(arm_cmpfp0 SPR:$Sd)]> { + [(set FPSCR_NZCV, (arm_cmpfp0 SPR:$Sd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -717,7 +734,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", - [(arm_cmpfp0 (f16 HPR:$Sd))]> { + [(set FPSCR_NZCV, (arm_cmpfp0 (f16 HPR:$Sd)))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -2492,7 +2509,8 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs], Rt = 0b1111 /* apsr_nzcv */ in def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), - "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>; + "vmrs", "\tAPSR_nzcv, fpscr", + [(arm_fmstat FPSCR_NZCV)]>; // Application level FPSCR -> GPR let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td index f37d0fe542b4f7..f5a675e2976bb7 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -413,7 +413,9 @@ def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1, v2i1], 32, (add VPR)> { // FPSCR, when the flags at the top of it are used as the input or // output to an instruction such as MVE VADC. -def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>; +def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)> { + let CopyCost = -1; +} // Scalar single precision floating point register class.. // FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 3551c927e989ee..07c79f6f227b02 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -412,24 +412,20 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) { // Optimize left rotation 3 bits to swap then right rotation 1 bit. Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); - Victim = - DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim); ShiftAmount = 0; } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) { // Optimize right rotation 3 bits to swap then left rotation 1 bit. Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); - Victim = - DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim); ShiftAmount = 0; } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) { // Optimize left rotation 7 bits to right rotation 1 bit. - Victim = - DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROR, dl, VT, Victim); ShiftAmount = 0; } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) { // Optimize right rotation 7 bits to left rotation 1 bit. - Victim = - DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); + Victim = DAG.getNode(AVRISD::ROL, dl, VT, Victim); ShiftAmount = 0; } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) && ShiftAmount >= 4) { @@ -890,10 +886,9 @@ SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue TargetCC; SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; - return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops); + return DAG.getNode(AVRISD::SELECT_CC, dl, Op.getValueType(), Ops); } SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -907,10 +902,9 @@ SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType()); SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType()); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; - return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops); + return DAG.getNode(AVRISD::SELECT_CC, DL, Op.getValueType(), Ops); } SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index e912878e9b23cc..3973cd30de1ecb 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -69,9 +69,9 @@ def AVRasrbn : SDNode<"AVRISD::ASRBN", SDTIntBinOp>; def AVRlslwn : SDNode<"AVRISD::LSLWN", SDTIntBinOp>; def AVRlsrwn : SDNode<"AVRISD::LSRWN", SDTIntBinOp>; def AVRasrwn : SDNode<"AVRISD::ASRWN", SDTIntBinOp>; -def AVRlslw : SDNode<"AVRISD::LSLW", SDTIntShiftDOp>; -def AVRlsrw : SDNode<"AVRISD::LSRW", SDTIntShiftDOp>; -def AVRasrw : SDNode<"AVRISD::ASRW", SDTIntShiftDOp>; +def AVRlslw : SDNode<"AVRISD::LSLW", SDTIntShiftPairOp>; +def AVRlsrw : SDNode<"AVRISD::LSRW", SDTIntShiftPairOp>; +def AVRasrw : SDNode<"AVRISD::ASRW", SDTIntShiftPairOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index da639903764e7a..1f5cadf37ab581 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -696,10 +696,9 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { NegateCC(LHS, RHS, CC); SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType()); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; - return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops); + return DAG.getNode(BPFISD::SELECT_CC, DL, Op.getValueType(), Ops); } const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 62d6e25f83b59f..86929a952d1ba4 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -44,7 +44,7 @@ def BPFcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_BPFCallSeqEnd, def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>; -def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC, [SDNPInGlue]>; +def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC>; def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>; def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 36228a5e0dce18..7cc08b2fe7cc4b 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -818,6 +818,15 @@ def FlattenedThreadIdInGroup : DXILOp<96, flattenedThreadIdInGroup> { let attributes = [Attributes]; } +def MakeDouble : DXILOp<101, makeDouble> { + let Doc = "creates a double value"; + let LLVMIntrinsic = int_dx_asdouble; + let arguments = [Int32Ty, Int32Ty]; + let result = DoubleTy; + let stages = [Stages]; + let attributes = [Attributes]; +} + def SplitDouble : DXILOp<102, splitDouble> { let Doc = "Splits a double into 2 uints"; let arguments = [OverloadTy]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 182cdaa4e9a7d7..2ca4e23594d56d 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -28,6 +28,8 @@ bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, bool DirectXTTIImpl::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int ScalarOpdIdx) { switch (ID) { + case Intrinsic::dx_asdouble: + return ScalarOpdIdx == 0; default: return ScalarOpdIdx == -1; } @@ -39,6 +41,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_frac: case Intrinsic::dx_rsqrt: case Intrinsic::dx_wave_readlane: + case Intrinsic::dx_asdouble: case Intrinsic::dx_splitdouble: case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_firstbitshigh: diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index e8ec93dd5ee63c..d758260a8ab5db 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -59,7 +59,7 @@ add_llvm_target(HexagonCodeGen HexagonSelectionDAGInfo.cpp HexagonSplitConst32AndConst64.cpp HexagonSplitDouble.cpp - HexagonStoreWidening.cpp + HexagonLoadStoreWidening.cpp HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp new file mode 100644 index 00000000000000..1a60d0e13057e9 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp @@ -0,0 +1,915 @@ +//===---HexagonLoadStoreWidening.cpp---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// HexagonStoreWidening: +// Replace sequences of "narrow" stores to adjacent memory locations with +// a fewer "wide" stores that have the same effect. +// For example, replace: +// S4_storeirb_io %100, 0, 0 ; store-immediate-byte +// S4_storeirb_io %100, 1, 0 ; store-immediate-byte +// with +// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword +// The above is the general idea. The actual cases handled by the code +// may be a bit more complex. +// The purpose of this pass is to reduce the number of outstanding stores, +// or as one could say, "reduce store queue pressure". Also, wide stores +// mean fewer stores, and since there are only two memory instructions allowed +// per packet, it also means fewer packets, and ultimately fewer cycles. +// +// HexagonLoadWidening does the same thing as HexagonStoreWidening but +// for Loads. Here, we try to replace 4-byte Loads with register-pair loads. +// For example: +// Replace +// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8) +// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2) +// with +// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1) +// %2:intregs = COPY %4.isub_lo:doubleregs +// %3:intregs = COPY %4.isub_hi:doubleregs +// +// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N +// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts + +//===---------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-load-store-widening" + +static cl::opt MaxMBBSizeForLoadStoreWidening( + "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000), + cl::desc("Limit block size to analyze in load/store widening pass")); + +namespace llvm { + +FunctionPass *createHexagonStoreWidening(); +FunctionPass *createHexagonLoadWidening(); +void initializeHexagonStoreWideningPass(PassRegistry &); +void initializeHexagonLoadWideningPass(PassRegistry &); + +} // end namespace llvm + +namespace { + +struct HexagonLoadStoreWidening { + enum WideningMode { Store, Load }; + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + MachineRegisterInfo *MRI; + AliasAnalysis *AA; + MachineFunction *MF; + +public: + HexagonLoadStoreWidening(const HexagonInstrInfo *TII, + const HexagonRegisterInfo *TRI, + MachineRegisterInfo *MRI, AliasAnalysis *AA, + MachineFunction *MF, bool StoreMode) + : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF), + Mode(StoreMode ? WideningMode::Store : WideningMode::Load), + HII(MF->getSubtarget().getInstrInfo()) {} + + bool run(); + +private: + const bool Mode; + const unsigned MaxWideSize = 8; + const HexagonInstrInfo *HII = nullptr; + + using InstrSet = SmallPtrSet; + using InstrGroup = SmallVector; + using InstrGroupList = SmallVector; + + InstrSet ProcessedInsts; + + unsigned getBaseAddressRegister(const MachineInstr *MI); + int64_t getOffset(const MachineInstr *MI); + int64_t getPostIncrementValue(const MachineInstr *MI); + bool handledInstType(const MachineInstr *MI); + + void createGroup(MachineInstr *BaseInst, InstrGroup &Group); + void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups); + bool processBasicBlock(MachineBasicBlock &MBB); + bool processGroup(InstrGroup &Group); + bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); + bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool replaceInsts(InstrGroup &OG, InstrGroup &NG); + bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2); + bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B); +}; + +struct HexagonStoreWidening : public MachineFunctionPass { + static char ID; + + HexagonStoreWidening() : MachineFunctionPass(ID) { + initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Hexagon Store Widening"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MFn) override { + if (skipFunction(MFn.getFunction())) + return false; + + auto &ST = MFn.getSubtarget(); + const HexagonInstrInfo *TII = ST.getInstrInfo(); + const HexagonRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo *MRI = &MFn.getRegInfo(); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + + return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run(); + } +}; + +struct HexagonLoadWidening : public MachineFunctionPass { + static char ID; + + HexagonLoadWidening() : MachineFunctionPass(ID) { + initializeHexagonLoadWideningPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Hexagon Load Widening"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MFn) override { + if (skipFunction(MFn.getFunction())) + return false; + + auto &ST = MFn.getSubtarget(); + const HexagonInstrInfo *TII = ST.getInstrInfo(); + const HexagonRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo *MRI = &MFn.getRegInfo(); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run(); + } +}; + +char HexagonStoreWidening::ID = 0; +char HexagonLoadWidening::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) + +INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads", + "Hexagon Load Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads", + "Hexagon Load Widening", false, false) + +static const MachineMemOperand &getMemTarget(const MachineInstr *MI) { + assert(!MI->memoperands_empty() && "Expecting memory operands"); + return **MI->memoperands_begin(); +} + +unsigned +HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) { + assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode"); + unsigned Base, Offset; + HII->getBaseAndOffsetPosition(*MI, Base, Offset); + const MachineOperand &MO = MI->getOperand(Base); + assert(MO.isReg() && "Expecting register operand"); + return MO.getReg(); +} + +int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) { + assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode"); + + // On Hexagon, post-incs always have an offset of 0 + // There is no Offset operand to post-incs + if (HII->isPostIncrement(*MI)) + return 0; + + unsigned Base, Offset; + + HII->getBaseAndOffsetPosition(*MI, Base, Offset); + const MachineOperand &MO = MI->getOperand(Offset); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + return MO.getImm(); + case MachineOperand::MO_GlobalAddress: + return MO.getOffset(); + default: + break; + } + llvm_unreachable("Expecting an immediate or global operand"); +} + +inline int64_t +HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) { + unsigned Base, PostIncIdx; + HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx); + const MachineOperand &MO = MI->getOperand(PostIncIdx); + return MO.getImm(); +} + +// Filtering function: any loads/stores whose opcodes are not "approved" of by +// this function will not be subjected to widening. +inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (Mode == WideningMode::Store) { + switch (Opc) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S2_storeri_io: + // Base address must be a register. (Implement FI later.) + return MI->getOperand(0).isReg(); + case Hexagon::S2_storeri_pi: + return MI->getOperand(1).isReg(); + } + } else { + // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x + // loads. So we only widen 32 bit loads as we don't need to select the + // right bits with AND & SHIFT ops. + switch (Opc) { + case Hexagon::L2_loadri_io: + // Base address must be a register and offset must be immediate. + return !MI->memoperands_empty() && MI->getOperand(1).isReg() && + MI->getOperand(2).isImm(); + case Hexagon::L2_loadri_pi: + return !MI->memoperands_empty() && MI->getOperand(2).isReg(); + } + } + return false; +} + +static void addDefsUsesToList(const MachineInstr *MI, + DenseSet &RegDefs, + DenseSet &RegUses) { + for (const auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + if (Op.isDef()) + RegDefs.insert(Op.getReg()); + if (Op.readsReg()) + RegUses.insert(Op.getReg()); + } +} + +bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A, + const MachineInstr *B) { + DenseSet ARegDefs; + DenseSet ARegUses; + addDefsUsesToList(A, ARegDefs, ARegUses); + if (A->mayLoadOrStore() && B->mayLoadOrStore() && + (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true)) + return false; + for (const auto &BOp : B->operands()) { + if (!BOp.isReg()) + continue; + if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg())) + return false; + if (BOp.isDef() && ARegUses.contains(BOp.getReg())) + return false; + } + return true; +} + +// Inspect a machine basic block, and generate groups out of loads/stores +// encountered in the block. +// +// A load/store group is a group of loads or stores that use the same base +// register, and which can be reordered within that group without altering the +// semantics of the program. A single group could be widened as +// a whole, if there existed a single load/store instruction with the same +// semantics as the entire group. In many cases, a single group may need more +// than one wide load or store. +void HexagonLoadStoreWidening::createGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups) { + // Traverse all instructions and if we encounter + // a load/store, then try to create a group starting at that instruction + // i.e. a sequence of independent loads/stores that can be widened. + for (auto I = MBB.begin(); I != MBB.end(); ++I) { + MachineInstr *MI = &(*I); + if (!handledInstType(MI)) + continue; + if (ProcessedInsts.count(MI)) + continue; + + // Found a store. Try to create a store group. + InstrGroup G; + createGroup(MI, G); + if (G.size() > 1) + StoreGroups.push_back(G); + } +} + +// Create a single load/store group. The insts need to be independent between +// themselves, and also there cannot be other instructions between them +// that could read or modify storage being read from or stored into. +void HexagonLoadStoreWidening::createGroup(MachineInstr *BaseInst, + InstrGroup &Group) { + assert(handledInstType(BaseInst) && "Unexpected instruction"); + unsigned BaseReg = getBaseAddressRegister(BaseInst); + InstrGroup Other; + + Group.push_back(BaseInst); + LLVM_DEBUG(dbgs() << "BaseInst: "; BaseInst->dump()); + auto End = BaseInst->getParent()->end(); + auto I = BaseInst->getIterator(); + + while (true) { + I = std::next(I); + if (I == End) + break; + MachineInstr *MI = &(*I); + + // Assume calls are aliased to everything. + if (MI->isCall() || MI->hasUnmodeledSideEffects() || + MI->hasOrderedMemoryRef()) + return; + + if (!handledInstType(MI)) { + if (MI->mayLoadOrStore()) + Other.push_back(MI); + continue; + } + + // We have a handledInstType instruction + // If this load/store instruction is aliased with anything already in the + // group, terminate the group now. + for (auto GI : Group) + if (GI->mayAlias(AA, *MI, true)) + return; + if (Mode == WideningMode::Load) { + // Check if current load MI can be moved to the first load instruction + // in Group. If any load instruction aliases with memory instructions in + // Other, terminate the group. + for (auto MemI : Other) + if (!canSwapInstructions(MI, MemI)) + return; + } else { + // Check if store instructions in the group can be moved to current + // store MI. If any store instruction aliases with memory instructions + // in Other, terminate the group. + for (auto MemI : Other) { + if (std::distance(Group.back()->getIterator(), MemI->getIterator()) <= + 0) + continue; + for (auto GI : Group) + if (!canSwapInstructions(MemI, GI)) + return; + } + } + + unsigned BR = getBaseAddressRegister(MI); + if (BR == BaseReg) { + LLVM_DEBUG(dbgs() << "Added MI to group: "; MI->dump()); + Group.push_back(MI); + ProcessedInsts.insert(MI); + } + } // while +} + +// Check if load/store instructions S1 and S2 are adjacent. More precisely, +// S2 has to access memory immediately following that accessed by S1. +bool HexagonLoadStoreWidening::areAdjacent(const MachineInstr *S1, + const MachineInstr *S2) { + if (!handledInstType(S1) || !handledInstType(S2)) + return false; + + const MachineMemOperand &S1MO = getMemTarget(S1); + + // Currently only handling immediate stores. + int Off1 = getOffset(S1); + int Off2 = getOffset(S2); + + return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2) + : int(Off1 + S1MO.getSize().getValue()) == Off2; +} + +/// Given a sequence of adjacent loads/stores, and a maximum size of a single +/// wide inst, pick a group of insts that can be replaced by a single load/store +/// of size not exceeding MaxSize. The selected sequence will be recorded +/// in OG ("old group" of instructions). +/// OG should be empty on entry, and should be left empty if the function +/// fails. +bool HexagonLoadStoreWidening::selectInsts(InstrGroup::iterator Begin, + InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, + unsigned MaxSize) { + assert(Begin != End && "No instructions to analyze"); + assert(OG.empty() && "Old group not empty on entry"); + + if (std::distance(Begin, End) <= 1) + return false; + + MachineInstr *FirstMI = *Begin; + assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); + const MachineMemOperand &FirstMMO = getMemTarget(FirstMI); + if (!FirstMMO.getType().isValid()) + return false; + + unsigned Alignment = FirstMMO.getAlign().value(); + unsigned SizeAccum = FirstMMO.getSize().getValue(); + unsigned FirstOffset = getOffset(FirstMI); + + // The initial value of SizeAccum should always be a power of 2. + assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); + + // If the size of the first store equals to or exceeds the limit, do nothing. + if (SizeAccum >= MaxSize) + return false; + + // If the size of the first load/store is greater than or equal to the address + // stored to, then the inst cannot be made any wider. + if (SizeAccum >= Alignment) { + LLVM_DEBUG( + dbgs() << "Size of load/store greater than equal to its alignment\n"); + return false; + } + + // The offset of a load/store will put restrictions on how wide the inst can + // be. Offsets in loads/stores of size 2^n bytes need to have the n lowest + // bits be 0. If the first inst already exhausts the offset limits, quit. + // Test this by checking if the next wider size would exceed the limit. + // For post-increment instructions, the increment amount needs to follow the + // same rule. + unsigned OffsetOrIncVal = 0; + if (HII->isPostIncrement(*FirstMI)) + OffsetOrIncVal = getPostIncrementValue(FirstMI); + else + OffsetOrIncVal = FirstOffset; + if ((2 * SizeAccum - 1) & OffsetOrIncVal) { + LLVM_DEBUG(dbgs() << "Instruction cannot be widened as the offset/postinc" + << " value: " << getPostIncrementValue(FirstMI) + << " is invalid in the widened version\n"); + return false; + } + + OG.push_back(FirstMI); + MachineInstr *S1 = FirstMI; + + // Pow2Num will be the largest number of elements in OG such that the sum + // of sizes of loads/stores 0...Pow2Num-1 will be a power of 2. + unsigned Pow2Num = 1; + unsigned Pow2Size = SizeAccum; + bool HavePostInc = HII->isPostIncrement(*S1); + + // Be greedy: keep accumulating insts as long as they are to adjacent + // memory locations, and as long as the total number of bytes stored + // does not exceed the limit (MaxSize). + // Keep track of when the total size covered is a power of 2, since + // this is a size a single load/store can cover. + for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { + MachineInstr *S2 = *I; + // Insts are sorted, so if S1 and S2 are not adjacent, there won't be + // any other store to fill the "hole". + if (!areAdjacent(S1, S2)) + break; + + // Cannot widen two post increments, need to return two registers + // with incremented values + if (HavePostInc && HII->isPostIncrement(*S2)) + break; + + unsigned S2Size = getMemTarget(S2).getSize().getValue(); + if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) + break; + + OG.push_back(S2); + SizeAccum += S2Size; + if (isPowerOf2_32(SizeAccum)) { + Pow2Num = OG.size(); + Pow2Size = SizeAccum; + } + if ((2 * Pow2Size - 1) & FirstOffset) + break; + + S1 = S2; + } + + // The insts don't add up to anything that can be widened. Clean up. + if (Pow2Num <= 1) { + OG.clear(); + return false; + } + + // Only leave the loads/stores being widened. + OG.resize(Pow2Num); + TotalSize = Pow2Size; + return true; +} + +/// Given an "old group" OG of insts, create a "new group" NG of instructions +/// to replace them. +bool HexagonLoadStoreWidening::createWideInsts(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + if (Mode == WideningMode::Store) { + return createWideStores(OG, NG, TotalSize); + } + return createWideLoads(OG, NG, TotalSize); +} + +/// Given an "old group" OG of stores, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for store-immediate. +bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + // XXX Current limitations: + // - only handle a TotalSize of up to 8 + + LLVM_DEBUG(dbgs() << "Creating wide stores\n"); + if (TotalSize > MaxWideSize) + return false; + + uint64_t Acc = 0; // Value accumulator. + unsigned Shift = 0; + bool HaveImm = false; + bool HaveReg = false; + + for (MachineInstr *MI : OG) { + const MachineMemOperand &MMO = getMemTarget(MI); + MachineOperand &SO = HII->isPostIncrement(*MI) + ? MI->getOperand(3) + : MI->getOperand(2); // Source. + unsigned NBits; + uint64_t Mask; + uint64_t Val; + + switch (SO.getType()) { + case MachineOperand::MO_Immediate: + LLVM_DEBUG(dbgs() << "Have store immediate\n"); + HaveImm = true; + + NBits = MMO.getSizeInBits().toRaw(); + Mask = (0xFFFFFFFFFFFFFFFFU >> (64 - NBits)); + Val = (SO.getImm() & Mask) << Shift; + Acc |= Val; + Shift += NBits; + break; + case MachineOperand::MO_Register: + HaveReg = true; + break; + default: + LLVM_DEBUG(dbgs() << "Unhandled store\n"); + return false; + } + } + + if (HaveImm && HaveReg) { + LLVM_DEBUG(dbgs() << "Cannot merge store register and store imm\n"); + return false; + } + + MachineInstr *FirstSt = OG.front(); + DebugLoc DL = OG.back()->getDebugLoc(); + const MachineMemOperand &OldM = getMemTarget(FirstSt); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlign(), OldM.getAAInfo()); + MachineInstr *StI; + MachineOperand &MR = + (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(1) + : FirstSt->getOperand(0)); + auto SecondSt = OG.back(); + if (HaveReg) { + MachineOperand FReg = + (HII->isPostIncrement(*FirstSt) ? FirstSt->getOperand(3) + : FirstSt->getOperand(2)); + // Post increments appear first in the sorted group. + // Cannot have a post increment for the second instruction + assert(!HII->isPostIncrement(*SecondSt) && "Unexpected PostInc"); + MachineOperand SReg = SecondSt->getOperand(2); + assert(FReg.isReg() && SReg.isReg() && + "Cannot merge store register and store imm"); + const MCInstrDesc &CombD = TII->get(Hexagon::A2_combinew); + Register VReg = + MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass); + MachineInstr *CombI = BuildMI(*MF, DL, CombD, VReg).add(SReg).add(FReg); + NG.push_back(CombI); + + if (FirstSt->getOpcode() == Hexagon::S2_storeri_pi) { + const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_pi); + auto IncDestMO = FirstSt->getOperand(0); + auto IncMO = FirstSt->getOperand(2); + StI = + BuildMI(*MF, DL, StD).add(IncDestMO).add(MR).add(IncMO).addReg(VReg); + } else { + const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io); + auto OffMO = FirstSt->getOperand(1); + StI = BuildMI(*MF, DL, StD).add(MR).add(OffMO).addReg(VReg); + } + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + return true; + } + + // Handle store immediates + // There are no post increment store immediates on Hexagon + assert(!HII->isPostIncrement(*FirstSt) && "Unexpected PostInc"); + auto Off = FirstSt->getOperand(1).getImm(); + if (TotalSize == 8) { + // Create vreg = A2_tfrsi #Acc; nreg = combine(#s32, vreg); memd = nreg + uint64_t Mask = 0xFFFFFFFFU; + int LowerAcc = int(Mask & Acc); + int UpperAcc = Acc >> 32; + Register DReg = + MF->getRegInfo().createVirtualRegister(&Hexagon::DoubleRegsRegClass); + MachineInstr *CombI; + if (Acc != 0) { + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc); + NG.push_back(TfrI); + const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineir); + CombI = BuildMI(*MF, DL, CombD, DReg) + .addImm(UpperAcc) + .addReg(VReg, RegState::Kill); + } + // If immediates are 0, we do not need A2_tfrsi + else { + const MCInstrDesc &CombD = TII->get(Hexagon::A4_combineii); + CombI = BuildMI(*MF, DL, CombD, DReg).addImm(0).addImm(0); + } + NG.push_back(CombI); + const MCInstrDesc &StD = TII->get(Hexagon::S2_storerd_io); + StI = + BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(DReg, RegState::Kill); + } else if (Acc < 0x10000) { + // Create mem[hw] = #Acc + unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io + : (TotalSize == 4) ? Hexagon::S4_storeiri_io + : 0; + assert(WOpc && "Unexpected size"); + + int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); + const MCInstrDesc &StD = TII->get(WOpc); + StI = BuildMI(*MF, DL, StD).add(MR).addImm(Off).addImm(Val); + } else { + // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc)); + NG.push_back(TfrI); + + unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io + : (TotalSize == 4) ? Hexagon::S2_storeri_io + : 0; + assert(WOpc && "Unexpected size"); + + const MCInstrDesc &StD = TII->get(WOpc); + StI = + BuildMI(*MF, DL, StD).add(MR).addImm(Off).addReg(VReg, RegState::Kill); + } + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + + return true; +} + +/// Given an "old group" OG of loads, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for double register loads. +bool HexagonLoadStoreWidening::createWideLoads(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + LLVM_DEBUG(dbgs() << "Creating wide loads\n"); + // XXX Current limitations: + // - only expect stores of immediate values in OG, + // - only handle a TotalSize of up to 8 + if (TotalSize > MaxWideSize) + return false; + assert(OG.size() == 2 && "Expecting two elements in Instruction Group."); + + MachineInstr *FirstLd = OG.front(); + const MachineMemOperand &OldM = getMemTarget(FirstLd); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlign(), OldM.getAAInfo()); + + MachineOperand &MR = FirstLd->getOperand(0); + MachineOperand &MRBase = + (HII->isPostIncrement(*FirstLd) ? FirstLd->getOperand(2) + : FirstLd->getOperand(1)); + DebugLoc DL = OG.back()->getDebugLoc(); + + // Create the double register Load Instruction. + Register NewMR = MRI->createVirtualRegister(&Hexagon::DoubleRegsRegClass); + MachineInstr *LdI; + + // Post increments appear first in the sorted group + if (FirstLd->getOpcode() == Hexagon::L2_loadri_pi) { + auto IncDestMO = FirstLd->getOperand(1); + auto IncMO = FirstLd->getOperand(3); + LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_pi)) + .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg()) + .add(IncDestMO) + .add(MRBase) + .add(IncMO); + LdI->addMemOperand(*MF, NewM); + } else { + auto OffMO = FirstLd->getOperand(2); + LdI = BuildMI(*MF, DL, TII->get(Hexagon::L2_loadrd_io)) + .addDef(NewMR, getKillRegState(MR.isKill()), MR.getSubReg()) + .add(MRBase) + .add(OffMO); + LdI->addMemOperand(*MF, NewM); + } + NG.push_back(LdI); + + auto getHalfReg = [&](MachineInstr *DoubleReg, unsigned SubReg, + MachineInstr *DstReg) { + Register DestReg = DstReg->getOperand(0).getReg(); + return BuildMI(*MF, DL, TII->get(Hexagon::COPY), DestReg) + .addReg(NewMR, getKillRegState(LdI->isKill()), SubReg); + }; + + MachineInstr *LdI_lo = getHalfReg(LdI, Hexagon::isub_lo, FirstLd); + MachineInstr *LdI_hi = getHalfReg(LdI, Hexagon::isub_hi, OG.back()); + NG.push_back(LdI_lo); + NG.push_back(LdI_hi); + + return true; +} + +// Replace instructions from the old group OG with instructions from the +// new group NG. Conceptually, remove all instructions in OG, and then +// insert all instructions in NG, starting at where the first instruction +// from OG was (in the order in which they appeared in the basic block). +// (The ordering in OG does not have to match the order in the basic block.) +bool HexagonLoadStoreWidening::replaceInsts(InstrGroup &OG, InstrGroup &NG) { + LLVM_DEBUG({ + dbgs() << "Replacing:\n"; + for (auto I : OG) + dbgs() << " " << *I; + dbgs() << "with\n"; + for (auto I : NG) + dbgs() << " " << *I; + }); + + MachineBasicBlock *MBB = OG.back()->getParent(); + MachineBasicBlock::iterator InsertAt = MBB->end(); + + // Need to establish the insertion point. + // For loads the best one is right before the first load in the OG, + // but in the order in which the insts occur in the program list. + // For stores the best point is right after the last store in the OG. + // Since the ordering in OG does not correspond + // to the order in the program list, we need to do some work to find + // the insertion point. + + // Create a set of all instructions in OG (for quick lookup). + InstrSet OldMemInsts; + for (auto *I : OG) + OldMemInsts.insert(I); + + if (Mode == WideningMode::Load) { + // Find the first load instruction in the block that is present in OG. + for (auto &I : *MBB) { + if (OldMemInsts.count(&I)) { + InsertAt = I; + break; + } + } + + assert((InsertAt != MBB->end()) && "Cannot locate any load from the group"); + + for (auto *I : NG) + MBB->insert(InsertAt, I); + } else { + // Find the last store instruction in the block that is present in OG. + auto I = MBB->rbegin(); + for (; I != MBB->rend(); ++I) { + if (OldMemInsts.count(&(*I))) { + InsertAt = (*I).getIterator(); + break; + } + } + + assert((I != MBB->rend()) && "Cannot locate any store from the group"); + + for (auto I = NG.rbegin(); I != NG.rend(); ++I) + MBB->insertAfter(InsertAt, *I); + } + + for (auto *I : OG) + I->eraseFromParent(); + + return true; +} + +// Break up the group into smaller groups, each of which can be replaced by +// a single wide load/store. Widen each such smaller group and replace the old +// instructions with the widened ones. +bool HexagonLoadStoreWidening::processGroup(InstrGroup &Group) { + bool Changed = false; + InstrGroup::iterator I = Group.begin(), E = Group.end(); + InstrGroup OG, NG; // Old and new groups. + unsigned CollectedSize; + + while (I != E) { + OG.clear(); + NG.clear(); + + bool Succ = selectInsts(I++, E, OG, CollectedSize, MaxWideSize) && + createWideInsts(OG, NG, CollectedSize) && replaceInsts(OG, NG); + if (!Succ) + continue; + + assert(OG.size() > 1 && "Created invalid group"); + assert(std::distance(I, E) + 1 >= int(OG.size()) && "Too many elements"); + I += OG.size() - 1; + + Changed = true; + } + + return Changed; +} + +// Process a single basic block: create the load/store groups, and replace them +// with the widened insts, if possible. Processing of each basic block +// is independent from processing of any other basic block. This transfor- +// mation could be stopped after having processed any basic block without +// any ill effects (other than not having performed widening in the unpro- +// cessed blocks). Also, the basic blocks can be processed in any order. +bool HexagonLoadStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { + InstrGroupList SGs; + bool Changed = false; + + // To prevent long compile time check for max BB size. + if (MBB.size() > MaxMBBSizeForLoadStoreWidening) + return false; + + createGroups(MBB, SGs); + + auto Less = [this](const MachineInstr *A, const MachineInstr *B) -> bool { + return getOffset(A) < getOffset(B); + }; + for (auto &G : SGs) { + assert(G.size() > 1 && "Group with fewer than 2 elements"); + llvm::sort(G, Less); + + Changed |= processGroup(G); + } + + return Changed; +} + +bool HexagonLoadStoreWidening::run() { + bool Changed = false; + + for (auto &B : *MF) + Changed |= processBasicBlock(B); + + return Changed; +} + +FunctionPass *llvm::createHexagonStoreWidening() { + return new HexagonStoreWidening(); +} + +FunctionPass *llvm::createHexagonLoadWidening() { + return new HexagonLoadWidening(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp deleted file mode 100644 index 9d8e5c53b8227a..00000000000000 --- a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ /dev/null @@ -1,606 +0,0 @@ -//===- HexagonStoreWidening.cpp -------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Replace sequences of "narrow" stores to adjacent memory locations with -// a fewer "wide" stores that have the same effect. -// For example, replace: -// S4_storeirb_io %100, 0, 0 ; store-immediate-byte -// S4_storeirb_io %100, 1, 0 ; store-immediate-byte -// with -// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword -// The above is the general idea. The actual cases handled by the code -// may be a bit more complex. -// The purpose of this pass is to reduce the number of outstanding stores, -// or as one could say, "reduce store queue pressure". Also, wide stores -// mean fewer stores, and since there are only two memory instructions allowed -// per packet, it also means fewer packets, and ultimately fewer cycles. -//===---------------------------------------------------------------------===// - -#include "HexagonInstrInfo.h" -#include "HexagonRegisterInfo.h" -#include "HexagonSubtarget.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/InitializePasses.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include - -#define DEBUG_TYPE "hexagon-widen-stores" - -using namespace llvm; - -namespace llvm { - -FunctionPass *createHexagonStoreWidening(); -void initializeHexagonStoreWideningPass(PassRegistry&); - -} // end namespace llvm - -namespace { - - struct HexagonStoreWidening : public MachineFunctionPass { - const HexagonInstrInfo *TII; - const HexagonRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - AliasAnalysis *AA; - MachineFunction *MF; - - public: - static char ID; - - HexagonStoreWidening() : MachineFunctionPass(ID) { - initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return "Hexagon Store Widening"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - static bool handledStoreType(const MachineInstr *MI); - - private: - static const int MaxWideSize = 4; - - using InstrGroup = std::vector; - using InstrGroupList = std::vector; - - bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); - bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); - void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, - InstrGroup::iterator End, InstrGroup &Group); - void createStoreGroups(MachineBasicBlock &MBB, - InstrGroupList &StoreGroups); - bool processBasicBlock(MachineBasicBlock &MBB); - bool processStoreGroup(InstrGroup &Group); - bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, - InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); - bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); - bool replaceStores(InstrGroup &OG, InstrGroup &NG); - bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); - }; - -} // end anonymous namespace - -char HexagonStoreWidening::ID = 0; - -INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", - "Hexason Store Widening", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", - "Hexagon Store Widening", false, false) - -// Some local helper functions... -static unsigned getBaseAddressRegister(const MachineInstr *MI) { - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isReg() && "Expecting register operand"); - return MO.getReg(); -} - -static int64_t getStoreOffset(const MachineInstr *MI) { - unsigned OpC = MI->getOpcode(); - assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); - - switch (OpC) { - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeiri_io: { - const MachineOperand &MO = MI->getOperand(1); - assert(MO.isImm() && "Expecting immediate offset"); - return MO.getImm(); - } - } - dbgs() << *MI; - llvm_unreachable("Store offset calculation missing for a handled opcode"); - return 0; -} - -static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { - assert(!MI->memoperands_empty() && "Expecting memory operands"); - return **MI->memoperands_begin(); -} - -// Filtering function: any stores whose opcodes are not "approved" of by -// this function will not be subjected to widening. -inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { - // For now, only handle stores of immediate values. - // Also, reject stores to stack slots. - unsigned Opc = MI->getOpcode(); - switch (Opc) { - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeiri_io: - // Base address must be a register. (Implement FI later.) - return MI->getOperand(0).isReg(); - default: - return false; - } -} - -// Check if the machine memory operand MMO is aliased with any of the -// stores in the store group Stores. -bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, - const MachineMemOperand &MMO) { - if (!MMO.getValue()) - return true; - - MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); - - for (auto *SI : Stores) { - const MachineMemOperand &SMO = getStoreTarget(SI); - if (!SMO.getValue()) - return true; - - MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); - if (!AA->isNoAlias(L, SL)) - return true; - } - - return false; -} - -// Check if the machine instruction MI accesses any storage aliased with -// any store in the group Stores. -bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, - const MachineInstr *MI) { - for (auto &I : MI->memoperands()) - if (instrAliased(Stores, *I)) - return true; - return false; -} - -// Inspect a machine basic block, and generate store groups out of stores -// encountered in the block. -// -// A store group is a group of stores that use the same base register, -// and which can be reordered within that group without altering the -// semantics of the program. A single store group could be widened as -// a whole, if there existed a single store instruction with the same -// semantics as the entire group. In many cases, a single store group -// may need more than one wide store. -void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, - InstrGroupList &StoreGroups) { - InstrGroup AllInsns; - - // Copy all instruction pointers from the basic block to a temporary - // list. This will allow operating on the list, and modifying its - // elements without affecting the basic block. - for (auto &I : MBB) - AllInsns.push_back(&I); - - // Traverse all instructions in the AllInsns list, and if we encounter - // a store, then try to create a store group starting at that instruction - // i.e. a sequence of independent stores that can be widened. - for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { - MachineInstr *MI = *I; - // Skip null pointers (processed instructions). - if (!MI || !handledStoreType(MI)) - continue; - - // Found a store. Try to create a store group. - InstrGroup G; - createStoreGroup(MI, I+1, E, G); - if (G.size() > 1) - StoreGroups.push_back(G); - } -} - -// Create a single store group. The stores need to be independent between -// themselves, and also there cannot be other instructions between them -// that could read or modify storage being stored into. -void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, - InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { - assert(handledStoreType(BaseStore) && "Unexpected instruction"); - unsigned BaseReg = getBaseAddressRegister(BaseStore); - InstrGroup Other; - - Group.push_back(BaseStore); - - for (auto I = Begin; I != End; ++I) { - MachineInstr *MI = *I; - if (!MI) - continue; - - if (handledStoreType(MI)) { - // If this store instruction is aliased with anything already in the - // group, terminate the group now. - if (instrAliased(Group, getStoreTarget(MI))) - return; - // If this store is aliased to any of the memory instructions we have - // seen so far (that are not a part of this group), terminate the group. - if (instrAliased(Other, getStoreTarget(MI))) - return; - - unsigned BR = getBaseAddressRegister(MI); - if (BR == BaseReg) { - Group.push_back(MI); - *I = nullptr; - continue; - } - } - - // Assume calls are aliased to everything. - if (MI->isCall() || MI->hasUnmodeledSideEffects()) - return; - - if (MI->mayLoadOrStore()) { - if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) - return; - Other.push_back(MI); - } - } // for -} - -// Check if store instructions S1 and S2 are adjacent. More precisely, -// S2 has to access memory immediately following that accessed by S1. -bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, - const MachineInstr *S2) { - if (!handledStoreType(S1) || !handledStoreType(S2)) - return false; - - const MachineMemOperand &S1MO = getStoreTarget(S1); - - // Currently only handling immediate stores. - int Off1 = S1->getOperand(1).getImm(); - int Off2 = S2->getOperand(1).getImm(); - - return (Off1 >= 0) ? Off1 + S1MO.getSize().getValue() == unsigned(Off2) - : int(Off1 + S1MO.getSize().getValue()) == Off2; -} - -/// Given a sequence of adjacent stores, and a maximum size of a single wide -/// store, pick a group of stores that can be replaced by a single store -/// of size not exceeding MaxSize. The selected sequence will be recorded -/// in OG ("old group" of instructions). -/// OG should be empty on entry, and should be left empty if the function -/// fails. -bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, - InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, - unsigned MaxSize) { - assert(Begin != End && "No instructions to analyze"); - assert(OG.empty() && "Old group not empty on entry"); - - if (std::distance(Begin, End) <= 1) - return false; - - MachineInstr *FirstMI = *Begin; - assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); - const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); - unsigned Alignment = FirstMMO.getAlign().value(); - unsigned SizeAccum = FirstMMO.getSize().getValue(); - unsigned FirstOffset = getStoreOffset(FirstMI); - - // The initial value of SizeAccum should always be a power of 2. - assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); - - // If the size of the first store equals to or exceeds the limit, do nothing. - if (SizeAccum >= MaxSize) - return false; - - // If the size of the first store is greater than or equal to the address - // stored to, then the store cannot be made any wider. - if (SizeAccum >= Alignment) - return false; - - // The offset of a store will put restrictions on how wide the store can be. - // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. - // If the first store already exhausts the offset limits, quit. Test this - // by checking if the next wider size would exceed the limit. - if ((2*SizeAccum-1) & FirstOffset) - return false; - - OG.push_back(FirstMI); - MachineInstr *S1 = FirstMI; - - // Pow2Num will be the largest number of elements in OG such that the sum - // of sizes of stores 0...Pow2Num-1 will be a power of 2. - unsigned Pow2Num = 1; - unsigned Pow2Size = SizeAccum; - - // Be greedy: keep accumulating stores as long as they are to adjacent - // memory locations, and as long as the total number of bytes stored - // does not exceed the limit (MaxSize). - // Keep track of when the total size covered is a power of 2, since - // this is a size a single store can cover. - for (InstrGroup::iterator I = Begin + 1; I != End; ++I) { - MachineInstr *S2 = *I; - // Stores are sorted, so if S1 and S2 are not adjacent, there won't be - // any other store to fill the "hole". - if (!storesAreAdjacent(S1, S2)) - break; - - unsigned S2Size = getStoreTarget(S2).getSize().getValue(); - if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) - break; - - OG.push_back(S2); - SizeAccum += S2Size; - if (isPowerOf2_32(SizeAccum)) { - Pow2Num = OG.size(); - Pow2Size = SizeAccum; - } - if ((2*Pow2Size-1) & FirstOffset) - break; - - S1 = S2; - } - - // The stores don't add up to anything that can be widened. Clean up. - if (Pow2Num <= 1) { - OG.clear(); - return false; - } - - // Only leave the stored being widened. - OG.resize(Pow2Num); - TotalSize = Pow2Size; - return true; -} - -/// Given an "old group" OG of stores, create a "new group" NG of instructions -/// to replace them. Ideally, NG would only have a single instruction in it, -/// but that may only be possible for store-immediate. -bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, - unsigned TotalSize) { - // XXX Current limitations: - // - only expect stores of immediate values in OG, - // - only handle a TotalSize of up to 4. - - if (TotalSize > 4) - return false; - - unsigned Acc = 0; // Value accumulator. - unsigned Shift = 0; - - for (MachineInstr *MI : OG) { - const MachineMemOperand &MMO = getStoreTarget(MI); - MachineOperand &SO = MI->getOperand(2); // Source. - assert(SO.isImm() && "Expecting an immediate operand"); - - unsigned NBits = MMO.getSize().getValue() * 8; - unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); - unsigned Val = (SO.getImm() & Mask) << Shift; - Acc |= Val; - Shift += NBits; - } - - MachineInstr *FirstSt = OG.front(); - DebugLoc DL = OG.back()->getDebugLoc(); - const MachineMemOperand &OldM = getStoreTarget(FirstSt); - MachineMemOperand *NewM = - MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), - TotalSize, OldM.getAlign(), OldM.getAAInfo()); - - if (Acc < 0x10000) { - // Create mem[hw] = #Acc - unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : - (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; - assert(WOpc && "Unexpected size"); - - int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); - const MCInstrDesc &StD = TII->get(WOpc); - MachineOperand &MR = FirstSt->getOperand(0); - int64_t Off = FirstSt->getOperand(1).getImm(); - MachineInstr *StI = - BuildMI(*MF, DL, StD) - .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) - .addImm(Off) - .addImm(Val); - StI->addMemOperand(*MF, NewM); - NG.push_back(StI); - } else { - // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg - const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); - const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); - Register VReg = MF->getRegInfo().createVirtualRegister(RC); - MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) - .addImm(int(Acc)); - NG.push_back(TfrI); - - unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : - (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; - assert(WOpc && "Unexpected size"); - - const MCInstrDesc &StD = TII->get(WOpc); - MachineOperand &MR = FirstSt->getOperand(0); - int64_t Off = FirstSt->getOperand(1).getImm(); - MachineInstr *StI = - BuildMI(*MF, DL, StD) - .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) - .addImm(Off) - .addReg(VReg, RegState::Kill); - StI->addMemOperand(*MF, NewM); - NG.push_back(StI); - } - - return true; -} - -// Replace instructions from the old group OG with instructions from the -// new group NG. Conceptually, remove all instructions in OG, and then -// insert all instructions in NG, starting at where the first instruction -// from OG was (in the order in which they appeared in the basic block). -// (The ordering in OG does not have to match the order in the basic block.) -bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { - LLVM_DEBUG({ - dbgs() << "Replacing:\n"; - for (auto I : OG) - dbgs() << " " << *I; - dbgs() << "with\n"; - for (auto I : NG) - dbgs() << " " << *I; - }); - - MachineBasicBlock *MBB = OG.back()->getParent(); - MachineBasicBlock::iterator InsertAt = MBB->end(); - - // Need to establish the insertion point. The best one is right before - // the first store in the OG, but in the order in which the stores occur - // in the program list. Since the ordering in OG does not correspond - // to the order in the program list, we need to do some work to find - // the insertion point. - - // Create a set of all instructions in OG (for quick lookup). - SmallPtrSet InstrSet; - for (auto *I : OG) - InstrSet.insert(I); - - // Traverse the block, until we hit an instruction from OG. - for (auto &I : *MBB) { - if (InstrSet.count(&I)) { - InsertAt = I; - break; - } - } - - assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); - - bool AtBBStart = false; - - // InsertAt points at the first instruction that will be removed. We need - // to move it out of the way, so it remains valid after removing all the - // old stores, and so we are able to recover it back to the proper insertion - // position. - if (InsertAt != MBB->begin()) - --InsertAt; - else - AtBBStart = true; - - for (auto *I : OG) - I->eraseFromParent(); - - if (!AtBBStart) - ++InsertAt; - else - InsertAt = MBB->begin(); - - for (auto *I : NG) - MBB->insert(InsertAt, I); - - return true; -} - -// Break up the group into smaller groups, each of which can be replaced by -// a single wide store. Widen each such smaller group and replace the old -// instructions with the widened ones. -bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { - bool Changed = false; - InstrGroup::iterator I = Group.begin(), E = Group.end(); - InstrGroup OG, NG; // Old and new groups. - unsigned CollectedSize; - - while (I != E) { - OG.clear(); - NG.clear(); - - bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && - createWideStores(OG, NG, CollectedSize) && - replaceStores(OG, NG); - if (!Succ) - continue; - - assert(OG.size() > 1 && "Created invalid group"); - assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); - I += OG.size()-1; - - Changed = true; - } - - return Changed; -} - -// Process a single basic block: create the store groups, and replace them -// with the widened stores, if possible. Processing of each basic block -// is independent from processing of any other basic block. This transfor- -// mation could be stopped after having processed any basic block without -// any ill effects (other than not having performed widening in the unpro- -// cessed blocks). Also, the basic blocks can be processed in any order. -bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { - InstrGroupList SGs; - bool Changed = false; - - createStoreGroups(MBB, SGs); - - auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { - return getStoreOffset(A) < getStoreOffset(B); - }; - for (auto &G : SGs) { - assert(G.size() > 1 && "Store group with fewer than 2 elements"); - llvm::sort(G, Less); - - Changed |= processStoreGroup(G); - } - - return Changed; -} - -bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { - if (skipFunction(MFn.getFunction())) - return false; - - MF = &MFn; - auto &ST = MFn.getSubtarget(); - TII = ST.getInstrInfo(); - TRI = ST.getRegisterInfo(); - MRI = &MFn.getRegInfo(); - AA = &getAnalysis().getAAResults(); - - bool Changed = false; - - for (auto &B : MFn) - Changed |= processBasicBlock(B); - - return Changed; -} - -FunctionPass *llvm::createHexagonStoreWidening() { - return new HexagonStoreWidening(); -} diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 884a7c599218ce..a97bc1985c614c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -66,6 +66,9 @@ static cl::opt DisableStoreWidening("disable-store-widen", cl::Hidden, cl::init(false), cl::desc("Disable store widening")); +static cl::opt DisableLoadWidening("disable-load-widen", cl::Hidden, + cl::desc("Disable load widening")); + static cl::opt EnableExpandCondsets("hexagon-expand-condsets", cl::init(true), cl::Hidden, cl::desc("Early expansion of MUX")); @@ -229,6 +232,7 @@ FunctionPass *createHexagonRDFOpt(); FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); +FunctionPass *createHexagonLoadWidening(); FunctionPass *createHexagonTfrCleanup(); FunctionPass *createHexagonVectorCombineLegacyPass(); FunctionPass *createHexagonVectorPrint(); @@ -460,6 +464,8 @@ void HexagonPassConfig::addPreRegAlloc() { insertPass(&VirtRegRewriterID, &HexagonTfrCleanupID); if (!DisableStoreWidening) addPass(createHexagonStoreWidening()); + if (!DisableLoadWidening) + addPass(createHexagonLoadWidening()); if (EnableGenMemAbs) addPass(createHexagonGenMemAbsolute()); if (!DisableHardwareLoops) diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index f88c9d19d1bc84..da55b7b8c6d68c 100644 --- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -873,8 +873,7 @@ SDValue LanaiTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG); SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32); - SDValue Glue = - DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS, TargetCC); + SDValue Glue = DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS); return DAG.getNode(LanaiISD::BR_CC, DL, Op.getValueType(), Chain, Dest, TargetCC, Glue); @@ -973,8 +972,7 @@ SDValue LanaiTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG); SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32); - SDValue Glue = - DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS, TargetCC); + SDValue Glue = DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS); return DAG.getNode(LanaiISD::SETCC, DL, Op.getValueType(), TargetCC, Glue); } @@ -990,12 +988,10 @@ SDValue LanaiTargetLowering::LowerSELECT_CC(SDValue Op, LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG); SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32); - SDValue Glue = - DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS, TargetCC); + SDValue Glue = DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - return DAG.getNode(LanaiISD::SELECT_CC, DL, VTs, TrueV, FalseV, TargetCC, - Glue); + return DAG.getNode(LanaiISD::SELECT_CC, DL, Op.getValueType(), TrueV, FalseV, + TargetCC, Glue); } SDValue LanaiTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index b2726c01c334fc..8caa88f988b848 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -2399,8 +2399,8 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // Block CopyFromReg so partial register stalls are avoided. T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode() != ISD::CopyFromReg) { - SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue); - SDValue Cmov = DAG.getNode(M68kISD::CMOV, DL, VTs, T2, T1, CC, Cond); + SDValue Cmov = + DAG.getNode(M68kISD::CMOV, DL, T1.getValueType(), T2, T1, CC, Cond); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } } @@ -2418,9 +2418,8 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // M68kISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {Op2, Op1, CC, Cond}; - return DAG.getNode(M68kISD::CMOV, DL, VTs, Ops); + return DAG.getNode(M68kISD::CMOV, DL, Op.getValueType(), Ops); } /// Return true if node is an ISD::AND or ISD::OR of two M68k::SETcc nodes diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 4e9090307e2f8d..a9294e76f8763f 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -491,11 +491,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) // FP Operations - getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, - G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM}) + getActionDefinitionsBuilder( + {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM}) .legalFor(ST.hasStdExtF(), {s32}) .legalFor(ST.hasStdExtD(), {s64}) - .legalFor(ST.hasStdExtZfh(), {s16}); + .legalFor(ST.hasStdExtZfh(), {s16}) + .libcallFor({s32, s64}); + + getActionDefinitionsBuilder({G_FNEG, G_FABS}) + .legalFor(ST.hasStdExtF(), {s32}) + .legalFor(ST.hasStdExtD(), {s64}) + .legalFor(ST.hasStdExtZfh(), {s16}) + .lowerFor({s32, s64}); getActionDefinitionsBuilder(G_FREM) .libcallFor({s32, s64}) @@ -506,7 +513,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .legalFor(ST.hasStdExtF(), {{s32, s32}}) .legalFor(ST.hasStdExtD(), {{s64, s64}, {s32, s64}, {s64, s32}}) .legalFor(ST.hasStdExtZfh(), {{s16, s16}, {s16, s32}, {s32, s16}}) - .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}); + .legalFor(ST.hasStdExtZfh() && ST.hasStdExtD(), {{s16, s64}, {s64, s16}}) + .lower(); // FIXME: Use Zfhmin. getActionDefinitionsBuilder(G_FPTRUNC) @@ -528,7 +536,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder(G_IS_FPCLASS) .customFor(ST.hasStdExtF(), {{s1, s32}}) .customFor(ST.hasStdExtD(), {{s1, s64}}) - .customFor(ST.hasStdExtZfh(), {{s1, s16}}); + .customFor(ST.hasStdExtZfh(), {{s1, s16}}) + .lowerFor({{s1, s32}, {s1, s64}}); getActionDefinitionsBuilder(G_FCONSTANT) .legalFor(ST.hasStdExtF(), {s32}) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6eae756b25fb5b..c0021f69f18e77 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2093,7 +2093,7 @@ bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // Assume target opcodes can't be scalarized. // TODO - do we have any exceptions? - if (Opc >= ISD::BUILTIN_OP_END) + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) return false; // If the vector op is not supported, try to convert to scalar. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index b01af468d9ea2b..2924083ece3443 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -285,7 +285,8 @@ def : Pat<(riscv_fclass FPR64:$rs1), (FCLASS_D $rs1)>; def : PatFprFpr; def : PatFprFpr; -def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; +def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), + (FSGNJN_D FPR64:$rs1, FPR64:$rs2)>; def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2, FRM_RNE))>; def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, @@ -323,7 +324,7 @@ def : Pat<(riscv_fclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>; def : PatFprFpr; def : PatFprFpr; def : Pat<(fcopysign FPR64INX:$rs1, (fneg FPR64INX:$rs2)), - (FSGNJN_D_INX $rs1, $rs2)>; + (FSGNJN_D_INX FPR64INX:$rs1, FPR64INX:$rs2)>; def : Pat<(fcopysign FPR64INX:$rs1, FPR32INX:$rs2), (FSGNJ_D_INX $rs1, (f64 (FCVT_D_S_INX $rs2, FRM_RNE)))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64INX:$rs2), @@ -361,7 +362,7 @@ def : Pat<(riscv_fclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>; def : PatFprFpr; def : PatFprFpr; def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)), - (FSGNJN_D_IN32X $rs1, $rs2)>; + (FSGNJN_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2)>; def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, FRM_RNE))>; def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 2c27e3950f07f9..6c41c53bb301fd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -570,7 +570,8 @@ defm : PatFprFpr_m; } let Predicates = [HasStdExtF] in { -def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>; +def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), + (FSGNJN_S FPR32:$rs1, FPR32:$rs2)>; // fmadd: rs1 * rs2 + rs3 def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3), @@ -594,7 +595,8 @@ def : Pat<(fneg (any_fma_nsz FPR32:$rs1, FPR32:$rs2, FPR32:$rs3)), } // Predicates = [HasStdExtF] let Predicates = [HasStdExtZfinx] in { -def : Pat<(fcopysign FPR32INX:$rs1, (fneg FPR32INX:$rs2)), (FSGNJN_S_INX $rs1, $rs2)>; +def : Pat<(fcopysign FPR32INX:$rs1, (fneg FPR32INX:$rs2)), + (FSGNJN_S_INX FPR32INX:$rs1, FPR32INX:$rs2)>; // fmadd: rs1 * rs2 + rs3 def : Pat<(any_fma FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index e2e99cc3f2b72c..625011c3b9f7ca 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -291,7 +291,8 @@ def : Pat<(riscv_fclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>; def : PatFprFpr; def : PatFprFpr; -def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), (FSGNJN_H $rs1, $rs2)>; +def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), + (FSGNJN_H FPR16:$rs1, FPR16:$rs2)>; def : Pat<(f16 (fcopysign FPR16:$rs1, FPR32:$rs2)), (FSGNJ_H $rs1, (f16 (FCVT_H_S $rs2, FRM_DYN)))>; @@ -334,7 +335,8 @@ def : Pat<(riscv_fclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>; def : PatFprFpr; def : PatFprFpr; -def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), (FSGNJN_H_INX $rs1, $rs2)>; +def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), + (FSGNJN_H_INX FPR16INX:$rs1, FPR16INX:$rs2)>; def : Pat<(fcopysign FPR16INX:$rs1, FPR32INX:$rs2), (FSGNJ_H_INX $rs1, (FCVT_H_S_INX $rs2, FRM_DYN))>; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 2b16dcbcd8695b..017264fbd46e01 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1013,6 +1013,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return LT.first; break; } + case Intrinsic::cttz: + case Intrinsic::ctlz: case Intrinsic::ctpop: { auto LT = getTypeLegalizationCost(RetTy); if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) @@ -1024,7 +1026,9 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (ST->hasVInstructions() && LT.second.isVector()) { // vrsub.vi v10, v8, 0 // vmax.vv v8, v8, v10 - return LT.first * 2; + return LT.first * + getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV}, + LT.second, CostKind); } break; } @@ -1155,6 +1159,16 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(), ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE, CostKind); + case Intrinsic::experimental_vp_splat: { + auto LT = getTypeLegalizationCost(RetTy); + // TODO: Lower i1 experimental_vp_splat + if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1) + return InstructionCost::getInvalid(); + return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint() + ? RISCV::VFMV_V_F + : RISCV::VMV_V_X, + LT.second, CostKind); + } case Intrinsic::vp_reduce_add: case Intrinsic::vp_reduce_fadd: case Intrinsic::vp_reduce_mul: @@ -1620,6 +1634,14 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost( std::pair LT = getTypeLegalizationCost(ValTy); + if (IsUnsigned && Opcode == Instruction::Add && + LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) { + // Represent vector_reduce_add(ZExt()) as + // ZExtOrTrunc(ctpop(bitcast to in)). + return LT.first * + getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind); + } + if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits()) return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy, FMF, CostKind); @@ -2320,20 +2342,6 @@ bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) { return true; } -bool RISCVTTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - // Inline a callee if its target-features are a subset of the callers - // target-features. - return (CallerBits & CalleeBits) == CalleeBits; -} - /// See if \p I should be considered for address type promotion. We check if \p /// I is a sext with right type and used in memory accesses. If it used in a /// "complex" getelementptr, we allow it to be promoted without finding other diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 498f48353dc0c7..6fd36e90a02ddd 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -60,9 +60,6 @@ class RISCVTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const; - /// Return the cost of materializing an immediate for a value operand of /// a store instruction. InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 38fd683832f0db..bb5b9f2d736f93 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -280,7 +280,7 @@ def SPbpicc : SDNode<"SPISD::BPICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPbpxcc : SDNode<"SPISD::BPXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPbrfccv9 : SDNode<"SPISD::BRFCC_V9", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; -def SPbrreg : SDNode<"SPISD::BR_REG", SDTSPbrreg, [SDNPHasChain, SDNPInGlue]>; +def SPbrreg : SDNode<"SPISD::BR_REG", SDTSPbrreg, [SDNPHasChain]>; def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>; def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>; @@ -293,7 +293,7 @@ def SPxtof : SDNode<"SPISD::XTOF", SDTSPXTOF>; def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>; def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>; def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>; -def SPselectreg : SDNode<"SPISD::SELECT_REG", SDTSPselectreg, [SDNPInGlue]>; +def SPselectreg : SDNode<"SPISD::SELECT_REG", SDTSPselectreg>; // These are target-independent nodes, but have target-specific formats. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 90d7bd934af405..403d238aa5b528 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -671,7 +671,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, } // Lower the displacement to a TargetConstant. - Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT); + Disp = CurDAG->getSignedTargetConstant(AM.Disp, SDLoc(Base), VT); } void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, @@ -2024,8 +2024,9 @@ SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { CurDAG->getConstant(IPM.XORValue, DL, MVT::i32)); if (IPM.AddValue) - Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, - CurDAG->getConstant(IPM.AddValue, DL, MVT::i32)); + Result = + CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, + CurDAG->getSignedConstant(IPM.AddValue, DL, MVT::i32)); EVT VT = Node->getValueType(0); if (VT == MVT::i32 && IPM.Bit == 31) { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 78d91299a357dd..8f505b7e198cfa 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1444,15 +1444,15 @@ void SystemZTargetLowering::LowerAsmOperandForConstraint( case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(Op)) if (isInt<16>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), - Op.getValueType())); + Ops.push_back(DAG.getSignedTargetConstant( + C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(Op)) if (isInt<20>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), - Op.getValueType())); + Ops.push_back(DAG.getSignedTargetConstant( + C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff @@ -2578,7 +2578,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) - C.Op1 = DAG.getConstant(Value, DL, MVT::i32); + C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable @@ -3410,7 +3410,7 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, } if (Invert) { SDValue Mask = - DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); + DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } if (Chain && Chain.getNode() != Cmp.getNode()) { @@ -3571,7 +3571,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, - DAG.getConstant(Offset, DL, PtrVT)); + DAG.getSignedConstant(Offset, DL, PtrVT)); return Result; } @@ -3834,7 +3834,7 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, const auto *TFL = Subtarget.getFrameLowering(); int Offset = TFL->getReturnAddressOffset(MF); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, - DAG.getConstant(Offset, DL, PtrVT)); + DAG.getSignedConstant(Offset, DL, PtrVT)); return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); } @@ -4584,7 +4584,7 @@ static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, // Get the address of the containing word. AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, DL, PtrVT)); + DAG.getSignedConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. @@ -4623,7 +4623,8 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) if (auto *Const = dyn_cast(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; - Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); + Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL, + Src2.getValueType()); } SDValue AlignedAddr, BitShift, NegBitShift; diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td index 0221e2c53f2f49..64345ca3a1394e 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -220,8 +220,8 @@ def NEGLF32 : SDNodeXFormgetTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 8-bit unsigned quantity. @@ -244,14 +244,14 @@ def UIMM12 : SDNodeXFormgetTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Negate and then truncate an immediate to a 16-bit signed quantity. def NEGSIMM16 : SDNodeXFormgetTargetConstant(int16_t(-N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int16_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 16-bit unsigned quantity. @@ -268,8 +268,8 @@ def SIMM32 : SDNodeXFormgetTargetConstant(int32_t(-N->getZExtValue()), SDLoc(N), - MVT::i64); + return CurDAG->getSignedTargetConstant(int32_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 32-bit unsigned quantity. diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 4eb58e27f7ad79..c182c9890509fb 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -53,7 +53,7 @@ static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op, int64_t Adj = getMemMemLenAdj(Op); SDValue LenAdj = DAG.getNode(ISD::ADD, DL, MVT::i64, DAG.getZExtOrTrunc(Size, DL, MVT::i64), - DAG.getConstant(0 - Adj, DL, MVT::i64)); + DAG.getSignedConstant(0 - Adj, DL, MVT::i64)); return createMemMemNode(DAG, DL, Op, Chain, Dst, Src, LenAdj, Byte); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 2d00889407ff48..a52af6832d583f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -429,7 +429,7 @@ bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // Assume target opcodes can't be scalarized. // TODO - do we have any exceptions? - if (Opc >= ISD::BUILTIN_OP_END) + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) return false; // If the vector op is not supported, try to convert to scalar. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index f96e3232b93f42..3d678e53841664 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -104,24 +104,6 @@ TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle( return TTI::ReductionShuffle::SplitHalf; } -bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, - const Function *Callee) const { - // Allow inlining only when the Callee has a subset of the Caller's - // features. In principle, we should be able to inline regardless of any - // features because WebAssembly supports features at module granularity, not - // function granularity, but without this restriction it would be possible for - // a module to "forget" about features if all the functions that used them - // were inlined. - const TargetMachine &TM = getTLI()->getTargetMachine(); - - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - - return (CallerBits & CalleeBits) == CalleeBits; -} - void WebAssemblyTTIImpl::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 2ce6cbf3ba0266..9691120b2e531d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -72,9 +72,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const; - bool areInlineCompatible(const Function *Caller, - const Function *Callee) const; - bool supportsTailCalls() const; bool isProfitableToSinkOperands(Instruction *I, diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index c3eae294919f3c..c27177484f55a4 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -329,6 +329,14 @@ static int readPrefixes(struct InternalInstruction *insn) { break; } + if (isREX(insn, byte)) { + insn->rexPrefix = byte; + isPrefix = true; + LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); + } else if (isPrefix) { + insn->rexPrefix = 0; + } + if (isPrefix) LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); } @@ -506,11 +514,6 @@ static int readPrefixes(struct InternalInstruction *insn) { LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx", insn->rex2ExtensionPrefix[0], insn->rex2ExtensionPrefix[1])); - } else if (isREX(insn, byte)) { - if (peek(insn, nextByte)) - return -1; - insn->rexPrefix = byte; - LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); } else --insn->readerCursor; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index db393402757787..d3b569eccc17be 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3300,7 +3300,7 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // Assume target opcodes can't be scalarized. // TODO - do we have any exceptions? - if (Opc >= ISD::BUILTIN_OP_END) + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) return false; // If the vector op is not supported, try to convert to scalar. @@ -14300,9 +14300,17 @@ static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef Mask, // sub-512-bit shuffles are padded to 512-bits for the shuffle and then // the active subvector is extracted. static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, + ArrayRef OriginalMask, SDValue V1, + SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + // Commute binary inputs so V2 is a load to simplify VPERMI2/T2 folds. + SmallVector Mask(OriginalMask); + if (!V2.isUndef() && isShuffleFoldableLoad(V1) && + !isShuffleFoldableLoad(V2)) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(V1, V2); + } + MVT MaskVT = VT.changeTypeToInteger(); SDValue MaskNode; MVT ShuffleVT = VT; @@ -42244,6 +42252,17 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL, DAG.getIntPtrConstant(0, DL)); } } + SmallVector Ops; + SmallVector Mask; + if (isShuffleFoldableLoad(N.getOperand(0)) && + !isShuffleFoldableLoad(N.getOperand(2)) && + getTargetShuffleMask(N, /*AllowSentinelZero=*/false, Ops, Mask)) { + ShuffleVectorSDNode::commuteMask(Mask); + SDValue NewMask = getConstVector( + Mask, N.getOperand(1).getSimpleValueType(), DAG, DL, /*IsMask=*/true); + return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask, + N.getOperand(0)); + } return SDValue(); } default: diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index 038100b8264de7..ba9779cdc335d6 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -45,12 +45,7 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; -// RES1, RES2, FLAGS = op LHS, RHS -def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, SDTCisVT<1, i32>]>; + def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -266,7 +261,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags, +def X86umul_flag : SDNode<"X86ISD::UMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>; def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>; diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td index 0c80f1eaadadb8..b31ed81160a295 100644 --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -210,10 +210,7 @@ def AlderLakePfmCounters : ProcPfmCounters { let IssueCounters = [ PfmIssueCounter<"ADLPPort00", "uops_dispatched:port_0">, PfmIssueCounter<"ADLPPort01", "uops_dispatched:port_1">, - // The perfmon documentation and thus libpfm seems to incorrectly label - // this performance counter, as ports 2,3, and 11 are actually grouped - // according to most documentation. See #113941 for additional details. - PfmIssueCounter<"ADLPPort02_03_11", "uops_dispatched:port_2_3_10">, + PfmIssueCounter<"ADLPPort02_03_10", "uops_dispatched:port_2_3_10">, PfmIssueCounter<"ADLPPort04_09", "uops_dispatched:port_4_9">, PfmIssueCounter<"ADLPPort05_11", "uops_dispatched:port_5_11">, PfmIssueCounter<"ADLPPort06", "uops_dispatched:port_6">, @@ -229,10 +226,7 @@ def SapphireRapidsPfmCounters : ProcPfmCounters { let IssueCounters = [ PfmIssueCounter<"SPRPort00", "uops_dispatched:port_0">, PfmIssueCounter<"SPRPort01", "uops_dispatched:port_1">, - // The perfmon documentation and thus libpfm seems to incorrectly label - // this performance counter, as ports 2,3, and 11 are actually grouped - // according to most documentation. See #113941 for additional details. - PfmIssueCounter<"SPRPort02_03_11", "uops_dispatched:port_2_3_10">, + PfmIssueCounter<"SPRPort02_03_10", "uops_dispatched:port_2_3_10">, PfmIssueCounter<"SPRPort04_09", "uops_dispatched:port_4_9">, PfmIssueCounter<"SPRPort05_11", "uops_dispatched:port_5_11">, PfmIssueCounter<"SPRPort06", "uops_dispatched:port_6">, diff --git a/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/llvm/lib/Target/X86/X86SchedAlderlakeP.td index f8c6b32a853be9..564369804711a9 100644 --- a/llvm/lib/Target/X86/X86SchedAlderlakeP.td +++ b/llvm/lib/Target/X86/X86SchedAlderlakeP.td @@ -56,16 +56,15 @@ def ADLPPort00_05 : ProcResGroup<[ADLPPort00, ADLPPort05]>; def ADLPPort00_05_06 : ProcResGroup<[ADLPPort00, ADLPPort05, ADLPPort06]>; def ADLPPort00_06 : ProcResGroup<[ADLPPort00, ADLPPort06]>; def ADLPPort01_05 : ProcResGroup<[ADLPPort01, ADLPPort05]>; -def ADLPPort01_05_10 : ProcResGroup<[ADLPPort01, ADLPPort05, ADLPPort10]>; +def ADLPPort01_05_11 : ProcResGroup<[ADLPPort01, ADLPPort05, ADLPPort11]>; def ADLPPort02_03 : ProcResGroup<[ADLPPort02, ADLPPort03]>; def ADLPPort02_03_07 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort07]>; -def ADLPPort02_03_11 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort11]>; -def ADLPPort05_11 : ProcResGroup<[ADLPPort05, ADLPPort11]>; +def ADLPPort02_03_10 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort10]>; def ADLPPort07_08 : ProcResGroup<[ADLPPort07, ADLPPort08]>; // EU has 112 reservation stations. -def ADLPPort00_01_05_06_10 : ProcResGroup<[ADLPPort00, ADLPPort01, ADLPPort05, - ADLPPort06, ADLPPort10]> { +def ADLPPort00_01_05_06_11 : ProcResGroup<[ADLPPort00, ADLPPort01, ADLPPort05, + ADLPPort06, ADLPPort11]> { let BufferSize = 112; } @@ -75,8 +74,8 @@ def ADLPPort04_09 : ProcResGroup<[ADLPPort04, ADLPPort09]> { } // MEM has 72 reservation stations. -def ADLPPort02_03_07_08_11 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort07, - ADLPPort08, ADLPPort11]> { +def ADLPPort02_03_07_08_10 : ProcResGroup<[ADLPPort02, ADLPPort03, ADLPPort07, + ADLPPort08, ADLPPort10]> { let BufferSize = 72; } @@ -114,7 +113,7 @@ multiclass ADLPWriteResPair { + def : WriteRes { let Latency = !add(Lat, LoadLat); let ReleaseAtCycles = !listconcat([1], Res); let NumMicroOps = !add(UOps, LoadUOps); @@ -127,49 +126,49 @@ multiclass ADLPWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes; +def : WriteRes { let Latency = 11; } defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; def : WriteRes; defm : X86WriteRes; defm : ADLPWriteResPair; def : WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; def : WriteRes { let Latency = 11; } -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; @@ -177,17 +176,17 @@ defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; @@ -199,12 +198,12 @@ defm : ADLPWriteResPair defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -212,7 +211,7 @@ defm : X86WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; @@ -249,13 +248,13 @@ defm : ADLPWriteResPair; defm : X86WriteRes; defm : X86WriteRes; -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : ADLPWriteResPair; @@ -268,8 +267,8 @@ defm : X86WriteResPairUnsupported; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -331,15 +330,15 @@ defm : X86WriteResPairUnsupported; def : WriteRes { let Latency = 2; } -defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteRes; defm : X86WriteRes; -defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; @@ -357,10 +356,10 @@ defm : X86WriteRes; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; defm : ADLPWriteResPair; -def : WriteRes { +def : WriteRes { let Latency = 5; } def : WriteRes { @@ -368,17 +367,17 @@ def : WriteRes { } defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : ADLPWriteResPair; +defm : ADLPWriteResPair; defm : ADLPWriteResPair; def : WriteRes { let Latency = AlderlakePModel.MaxLatency; } -def : WriteRes; +def : WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; @@ -393,16 +392,16 @@ defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes { let Latency = 3; } @@ -447,20 +446,20 @@ defm : ADLPWriteResPair; defm : ADLPWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : ADLPWriteResPair; @@ -474,8 +473,8 @@ def : WriteRes { let Latency = 4; } defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -498,9 +497,9 @@ def : WriteRes; defm : X86WriteResUnsupported; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; defm : X86WriteRes; @@ -509,7 +508,7 @@ defm : X86WriteRes; defm : ADLPWriteResPair; defm : ADLPWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; // Infered SchedWriteRes and InstRW definition. @@ -521,14 +520,14 @@ def ADLPWriteResGroup0 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort02_03, ADLP def : InstRW<[ADLPWriteResGroup0], (instregex "^AA(D|N)D64mr$", "^A(X?)OR64mr$")>; -def ADLPWriteResGroup1 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup1 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 12; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup1, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(16|32|64)mr$")>; -def ADLPWriteResGroup2 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup2 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -538,20 +537,20 @@ def : InstRW<[ADLPWriteResGroup2], (instregex "^JMP(16|32|64)m((_NT)?)$", def : InstRW<[ADLPWriteResGroup2, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(8|16|32|64)rm$", "^AD(C|O)X(32|64)rm$")>; -def ADLPWriteResGroup3 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup3 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[ADLPWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>; -def ADLPWriteResGroup4 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup4 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup4, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)8mr$")>; -def ADLPWriteResGroup5 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup5 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -576,7 +575,7 @@ def : InstRW<[ADLPWriteResGroup6], (instregex "^(ADD|SUB)64ri8$", def : InstRW<[ADLPWriteResGroup6], (instrs CLC, JMP_2)>; -def ADLPWriteResGroup7 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup7 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 13; let NumMicroOps = 4; } @@ -610,7 +609,7 @@ def ADLPWriteResGroup10 : SchedWriteRes<[ADLPPort02_03, ADLPPort05]> { def : InstRW<[ADLPWriteResGroup10], (instregex "^ADD_FI(16|32)m$", "^SUB(R?)_FI(16|32)m$")>; -def ADLPWriteResGroup11 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup11 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let Latency = 2; } def : InstRW<[ADLPWriteResGroup11], (instregex "^AND(8|16|32|64)r(r|i8)$", @@ -628,7 +627,7 @@ def : InstRW<[ADLPWriteResGroup11], (instregex "^AND(8|16|32|64)r(r|i8)$", "^TEST(8|16|32|64)rr$")>; def : InstRW<[ADLPWriteResGroup11], (instrs XOR8rr_NOREX)>; -def ADLPWriteResGroup12 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup12 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } @@ -638,18 +637,18 @@ def : InstRW<[ADLPWriteResGroup12, ReadAfterLd], (instregex "^(X?)OR64rm$")>; def : InstRW<[ADLPWriteResGroup12, ReadAfterLd], (instrs AND64rm)>; def : InstRW<[ADLPWriteResGroup12, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^TEST(8|16|32|64)mr$")>; -def ADLPWriteResGroup13 : SchedWriteRes<[ADLPPort01_05_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup13 : SchedWriteRes<[ADLPPort01_05_11, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup13, ReadAfterLd], (instregex "^ANDN(32|64)rm$")>; -def ADLPWriteResGroup14 : SchedWriteRes<[ADLPPort01_05_10]> { +def ADLPWriteResGroup14 : SchedWriteRes<[ADLPPort01_05_11]> { let Latency = 2; } def : InstRW<[ADLPWriteResGroup14], (instregex "^ANDN(32|64)rr$")>; -def ADLPWriteResGroup15 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup15 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 2, 1, 1]; let Latency = 10; let NumMicroOps = 9; @@ -662,14 +661,14 @@ def ADLPWriteResGroup16 : SchedWriteRes<[ADLPPort01]> { def : InstRW<[ADLPWriteResGroup16], (instregex "^BT((C|R|S)?)64rr$", "^P(DEP|EXT)(32|64)rr$")>; -def ADLPWriteResGroup17 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup17 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 2, 1, 1, 1, 1]; let Latency = 17; let NumMicroOps = 10; } def : InstRW<[ADLPWriteResGroup17], (instregex "^BT(C|R|S)64mr$")>; -def ADLPWriteResGroup18 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup18 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 7; let NumMicroOps = 5; } @@ -701,25 +700,25 @@ def ADLPWriteResGroup22 : SchedWriteRes<[ADLPPort00_06]>; def : InstRW<[ADLPWriteResGroup22], (instregex "^C(DQ|QO)$", "^(CL|ST)AC$")>; -def ADLPWriteResGroup23 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup23 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let Latency = 3; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup23], (instrs CLD)>; -def ADLPWriteResGroup24 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup24 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let Latency = 3; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup24], (instrs CLDEMOTE)>; -def ADLPWriteResGroup25 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup25 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort04_09, ADLPPort07_08]> { let Latency = 2; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup25], (instrs CLFLUSH)>; -def ADLPWriteResGroup26 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup26 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let Latency = 2; let NumMicroOps = 3; } @@ -739,35 +738,35 @@ def ADLPWriteResGroup28 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort05]> } def : InstRW<[ADLPWriteResGroup28], (instrs CLTS)>; -def ADLPWriteResGroup29 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup29 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let Latency = 5; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup29], (instregex "^MOV16o(16|32|64)a$")>; def : InstRW<[ADLPWriteResGroup29], (instrs CLWB)>; -def ADLPWriteResGroup30 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup30 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 2]; let Latency = 6; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup30], (instregex "^CMPS(B|L|Q|W)$")>; -def ADLPWriteResGroup31 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup31 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [2, 7, 6, 2, 1, 1, 2, 1]; let Latency = 32; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup31], (instrs CMPXCHG16B)>; -def ADLPWriteResGroup32 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup32 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 7, 2, 1, 1, 1]; let Latency = 25; let NumMicroOps = 16; } def : InstRW<[ADLPWriteResGroup32], (instrs CMPXCHG8B)>; -def ADLPWriteResGroup33 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup33 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; @@ -781,13 +780,13 @@ def ADLPWriteResGroup34 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_0 } def : InstRW<[ADLPWriteResGroup34], (instrs CPUID)>; -def ADLPWriteResGroup35 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort02_03_11]> { +def ADLPWriteResGroup35 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort02_03_10]> { let Latency = 26; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup35], (instregex "^(V?)CVT(T?)SD2SIrm((_Int)?)$")>; -def ADLPWriteResGroup36 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup36 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_10, ADLPPort05]> { let Latency = 12; let NumMicroOps = 3; } @@ -811,7 +810,7 @@ def ADLPWriteResGroup38 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort05]> def : InstRW<[ADLPWriteResGroup38], (instregex "^(V?)CVT(T?)SS2SI64rr_Int$")>; def : InstRW<[ADLPWriteResGroup38, ReadDefault], (instregex "^(V?)CVT(T?)SS2SI64rr$")>; -def ADLPWriteResGroup39 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup39 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let Latency = 2; let NumMicroOps = 2; } @@ -827,7 +826,7 @@ def : InstRW<[ADLPWriteResGroup40], (instrs DEC16r_alt, ST_FPrr, SYSCALL)>; -def ADLPWriteResGroup41 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup41 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 7; } def : InstRW<[ADLPWriteResGroup41], (instrs DEC32r_alt)>; @@ -850,7 +849,7 @@ def ADLPWriteResGroup44 : SchedWriteRes<[ADLPPort00]> { def : InstRW<[ADLPWriteResGroup44], (instregex "^DIVR_F(P?)rST0$")>; def : InstRW<[ADLPWriteResGroup44], (instrs DIVR_FST0r)>; -def ADLPWriteResGroup45 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11]> { +def ADLPWriteResGroup45 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10]> { let Latency = 20; let NumMicroOps = 2; } @@ -874,7 +873,7 @@ def ADLPWriteResGroup48 : SchedWriteRes<[ADLPPort00]> { def : InstRW<[ADLPWriteResGroup48], (instregex "^DIV_F(P?)rST0$")>; def : InstRW<[ADLPWriteResGroup48], (instrs DIV_FST0r)>; -def ADLPWriteResGroup49 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup49 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [2, 21, 2, 14, 4, 9, 5]; let Latency = 126; let NumMicroOps = 57; @@ -1001,14 +1000,14 @@ def ADLPWriteResGroup67 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPPo } def : InstRW<[ADLPWriteResGroup67], (instrs FXRSTOR64)>; -def ADLPWriteResGroup68 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup68 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [2, 5, 10, 10, 2, 38, 5, 38]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 110; } def : InstRW<[ADLPWriteResGroup68], (instregex "^FXSAVE((64)?)$")>; -def ADLPWriteResGroup69 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11]> { +def ADLPWriteResGroup69 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_10]> { let Latency = 12; let NumMicroOps = 2; } @@ -1023,41 +1022,41 @@ def ADLPWriteResGroup70 : SchedWriteRes<[ADLPPort00_01]> { def : InstRW<[ADLPWriteResGroup70], (instregex "^(V?)GF2P8MULBrr$")>; def : InstRW<[ADLPWriteResGroup70], (instrs VGF2P8MULBYrr)>; -def ADLPWriteResGroup71 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup71 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 5, 26, 19, 2, 7, 21]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[ADLPWriteResGroup71], (instrs IN16ri)>; -def ADLPWriteResGroup72 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup72 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 1, 4, 26, 19, 3, 7, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[ADLPWriteResGroup72], (instrs IN16rr)>; -def ADLPWriteResGroup73 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup73 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 6, 28, 21, 2, 10, 20]; let Latency = 35; let NumMicroOps = 94; } def : InstRW<[ADLPWriteResGroup73], (instrs IN32ri)>; -def ADLPWriteResGroup74 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup74 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 9, 28, 21, 2, 11, 21]; let NumMicroOps = 99; } def : InstRW<[ADLPWriteResGroup74], (instrs IN32rr)>; -def ADLPWriteResGroup75 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup75 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 8, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[ADLPWriteResGroup75], (instrs IN8ri)>; -def ADLPWriteResGroup76 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup76 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 7, 20]; let Latency = 35; let NumMicroOps = 86; @@ -1069,7 +1068,7 @@ def ADLPWriteResGroup77 : SchedWriteRes<[ADLPPort00_06]> { } def : InstRW<[ADLPWriteResGroup77], (instrs INC16r_alt)>; -def ADLPWriteResGroup78 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup78 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 7; } def : InstRW<[ADLPWriteResGroup78], (instregex "^(V?)MOV(D|SH|SL)DUPrm$", @@ -1077,28 +1076,28 @@ def : InstRW<[ADLPWriteResGroup78], (instregex "^(V?)MOV(D|SH|SL)DUPrm$", def : InstRW<[ADLPWriteResGroup78], (instrs INC32r_alt, VBROADCASTSSrm)>; -def ADLPWriteResGroup79 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup79 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 6, 24, 17, 8, 1, 19, 1]; let Latency = 20; let NumMicroOps = 83; } def : InstRW<[ADLPWriteResGroup79], (instrs INSB)>; -def ADLPWriteResGroup80 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup80 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1]; let Latency = 20; let NumMicroOps = 92; } def : InstRW<[ADLPWriteResGroup80], (instrs INSL)>; -def ADLPWriteResGroup81 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup81 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1]; let Latency = 20; let NumMicroOps = 86; } def : InstRW<[ADLPWriteResGroup81], (instrs INSW)>; -def ADLPWriteResGroup82 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup82 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 4, 8, 6, 2, 5, 7, 5]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 42; @@ -1128,35 +1127,35 @@ def ADLPWriteResGroup86 : SchedWriteRes<[]> { def : InstRW<[ADLPWriteResGroup86], (instregex "^JMP_(1|4)$")>; def : InstRW<[ADLPWriteResGroup86], (instrs VZEROUPPER)>; -def ADLPWriteResGroup87 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup87 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [8, 2, 14, 3, 1]; let Latency = 198; let NumMicroOps = 81; } def : InstRW<[ADLPWriteResGroup87], (instrs LAR16rm)>; -def ADLPWriteResGroup88 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup88 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 66; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup88], (instrs LAR16rr)>; -def ADLPWriteResGroup89 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup89 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 85; } def : InstRW<[ADLPWriteResGroup89], (instrs LAR32rm)>; -def ADLPWriteResGroup90 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup90 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 65; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup90], (instregex "^LAR(32|64)rr$")>; -def ADLPWriteResGroup91 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup91 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 87; @@ -1168,13 +1167,13 @@ def ADLPWriteResGroup92 : SchedWriteRes<[ADLPPort02_03]> { } def : InstRW<[ADLPWriteResGroup92], (instregex "^LD_F(32|64|80)m$")>; -def ADLPWriteResGroup93 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup93 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let Latency = 2; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup93], (instrs LEA16r)>; -def ADLPWriteResGroup94 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup94 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 6; let NumMicroOps = 4; @@ -1183,77 +1182,77 @@ def : InstRW<[ADLPWriteResGroup94], (instregex "^LODS(B|W)$", "^SCAS(B|L|Q|W)$")>; def : InstRW<[ADLPWriteResGroup94], (instrs LEAVE)>; -def ADLPWriteResGroup95 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup95 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup95], (instrs LEAVE64)>; -def ADLPWriteResGroup96 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup96 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 3, 2, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[ADLPWriteResGroup96], (instrs LGDT64m)>; -def ADLPWriteResGroup97 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup97 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 1, 5, 3, 2, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[ADLPWriteResGroup97], (instrs LIDT64m)>; -def ADLPWriteResGroup98 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup98 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [5, 3, 2, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 12; } def : InstRW<[ADLPWriteResGroup98], (instrs LLDT16m)>; -def ADLPWriteResGroup99 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup99 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 4, 3, 1, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 11; } def : InstRW<[ADLPWriteResGroup99], (instrs LLDT16r)>; -def ADLPWriteResGroup100 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup100 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 27; } def : InstRW<[ADLPWriteResGroup100], (instrs LMSW16m)>; -def ADLPWriteResGroup101 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup101 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 7, 1, 2, 5, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 22; } def : InstRW<[ADLPWriteResGroup101], (instrs LMSW16r)>; -def ADLPWriteResGroup102 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup102 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 5; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup102], (instregex "^LODS(L|Q)$")>; -def ADLPWriteResGroup103 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup103 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 4, 1]; let Latency = 3; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup103], (instrs LOOP)>; -def ADLPWriteResGroup104 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup104 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 3; let NumMicroOps = 11; } def : InstRW<[ADLPWriteResGroup104], (instrs LOOPE)>; -def ADLPWriteResGroup105 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup105 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 2; let NumMicroOps = 11; @@ -1266,21 +1265,21 @@ def ADLPWriteResGroup106 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort02_03, AD } def : InstRW<[ADLPWriteResGroup106], (instrs LRET64)>; -def ADLPWriteResGroup107 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup107 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 5, 3, 3, 1]; let Latency = 70; let NumMicroOps = 13; } def : InstRW<[ADLPWriteResGroup107], (instregex "^LSL(16|32|64)rm$")>; -def ADLPWriteResGroup108 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup108 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 4, 4, 3, 2, 1]; let Latency = 63; let NumMicroOps = 15; } def : InstRW<[ADLPWriteResGroup108], (instregex "^LSL(16|32|64)rr$")>; -def ADLPWriteResGroup109 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup109 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_10, ADLPPort05]> { let Latency = 24; let NumMicroOps = 3; } @@ -1304,7 +1303,7 @@ def ADLPWriteResGroup112 : SchedWriteRes<[ADLPPort00, ADLPPort00_01]> { } def : InstRW<[ADLPWriteResGroup112], (instrs MMX_CVTPI2PSrr)>; -def ADLPWriteResGroup113 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11]> { +def ADLPWriteResGroup113 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10]> { let Latency = 13; let NumMicroOps = 2; } @@ -1329,7 +1328,7 @@ def ADLPWriteResGroup116 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup116], (instrs MMX_MOVD64mr)>; -def ADLPWriteResGroup117 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup117 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 8; } def : InstRW<[ADLPWriteResGroup117], (instregex "^MMX_MOV(D|Q)64rm$", @@ -1351,7 +1350,7 @@ def ADLPWriteResGroup119 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05]> { } def : InstRW<[ADLPWriteResGroup119], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>; -def ADLPWriteResGroup120 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup120 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 12; let NumMicroOps = 3; @@ -1368,13 +1367,13 @@ def : InstRW<[ADLPWriteResGroup121], (instregex "^MMX_PACKSS(DW|WB)rr$")>; def : InstRW<[ADLPWriteResGroup121], (instrs MMX_PACKUSWBrr)>; def : InstRW<[ADLPWriteResGroup121, ReadDefault, ReadInt2Fpu], (instrs MMX_PINSRWrri)>; -def ADLPWriteResGroup122 : SchedWriteRes<[ADLPPort00_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup122 : SchedWriteRes<[ADLPPort00_05, ADLPPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup122, ReadAfterVecLd], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rm$")>; -def ADLPWriteResGroup123 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup123 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 11; let NumMicroOps = 4; @@ -1388,7 +1387,7 @@ def ADLPWriteResGroup124 : SchedWriteRes<[ADLPPort00, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup124], (instregex "^MMX_PH(ADD|SUB)SWrr$")>; -def ADLPWriteResGroup125 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup125 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 9; let NumMicroOps = 2; } @@ -1396,7 +1395,7 @@ def : InstRW<[ADLPWriteResGroup125], (instregex "^VPBROADCAST(B|W)Yrm$")>; def : InstRW<[ADLPWriteResGroup125, ReadAfterLd], (instrs MMX_PINSRWrmi)>; def : InstRW<[ADLPWriteResGroup125, ReadAfterVecYLd], (instrs VPALIGNRYrmi)>; -def ADLPWriteResGroup126 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup126 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let Latency = 5; let NumMicroOps = 2; } @@ -1410,35 +1409,35 @@ def : InstRW<[ADLPWriteResGroup127], (instregex "^PUSH(F|G)S(16|32)$")>; def : InstRW<[ADLPWriteResGroup127], (instrs MOV16ms, MOVBE32mr)>; -def ADLPWriteResGroup128 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup128 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup128], (instregex "^MOV(16|32|64)rs$", "^S(TR|LDT)16r$")>; -def ADLPWriteResGroup129 : SchedWriteRes<[ADLPPort02_03_11]>; +def ADLPWriteResGroup129 : SchedWriteRes<[ADLPPort02_03_10]>; def : InstRW<[ADLPWriteResGroup129], (instregex "^MOV32ao(16|32|64)$")>; def : InstRW<[ADLPWriteResGroup129], (instrs MOV64ao64)>; -def ADLPWriteResGroup130 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup130 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup130], (instregex "^MOV(8|32)o(16|32)a$", "^MOV(8|32|64)o64a$")>; -def ADLPWriteResGroup131 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup131 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let Latency = 0; } def : InstRW<[ADLPWriteResGroup131], (instregex "^MOV32rr((_REV)?)$", "^MOVZX(32|64)rr8$")>; def : InstRW<[ADLPWriteResGroup131], (instrs MOVZX32rr8_NOREX)>; -def ADLPWriteResGroup132 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup132 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 5; } def : InstRW<[ADLPWriteResGroup132], (instrs MOV64ao32)>; -def ADLPWriteResGroup133 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup133 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2]; let Latency = 217; let NumMicroOps = 48; @@ -1451,20 +1450,20 @@ def ADLPWriteResGroup134 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup134], (instrs MOV64o32a)>; -def ADLPWriteResGroup135 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort05]> { +def ADLPWriteResGroup135 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort05]> { let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup135], (instrs MOV64rc)>; -def ADLPWriteResGroup136 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort05]> { +def ADLPWriteResGroup136 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort05]> { let ReleaseAtCycles = [3, 4, 8, 4, 2, 3]; let Latency = 181; let NumMicroOps = 24; } def : InstRW<[ADLPWriteResGroup136], (instrs MOV64rd)>; -def ADLPWriteResGroup137 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup137 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup137], (instregex "^MOV8ao(16|32|64)$")>; @@ -1482,13 +1481,13 @@ def ADLPWriteResGroup139 : SchedWriteRes<[ADLPPort00_06, ADLPPort04_09, ADLPPort } def : InstRW<[ADLPWriteResGroup139], (instrs MOVBE16mr)>; -def ADLPWriteResGroup140 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup140 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup140], (instrs MOVBE16rm)>; -def ADLPWriteResGroup141 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup141 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -1503,13 +1502,13 @@ def : InstRW<[ADLPWriteResGroup142], (instrs MOVBE64mr, SLDT16m, STRm)>; -def ADLPWriteResGroup143 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup143 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup143], (instrs MOVBE64rm)>; -def ADLPWriteResGroup144 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup144 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup144], (instregex "^MOVDIR64B(16|32|64)$")>; @@ -1526,7 +1525,7 @@ def ADLPWriteResGroup146 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup146], (instrs MOVDIRI64)>; -def ADLPWriteResGroup147 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup147 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1545,7 +1544,7 @@ def ADLPWriteResGroup149 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup149], (instrs MOVNTImr)>; -def ADLPWriteResGroup150 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup150 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 8; let NumMicroOps = 7; @@ -1558,27 +1557,27 @@ def : InstRW<[ADLPWriteResGroup151], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$", "^VP(ADD|SUB)(B|D|Q|W)Yrr$")>; def : InstRW<[ADLPWriteResGroup151], (instrs VPBLENDDrri)>; -def ADLPWriteResGroup152 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup152 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 7; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup152], (instregex "^MOVS(L|Q|W)$")>; -def ADLPWriteResGroup153 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup153 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 6; } def : InstRW<[ADLPWriteResGroup153], (instregex "^MOVSX(16|32|64)rm(16|32)$", "^MOVSX(32|64)rm8$")>; def : InstRW<[ADLPWriteResGroup153], (instrs MOVSX32rm8_NOREX)>; -def ADLPWriteResGroup154 : SchedWriteRes<[ADLPPort01_05_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup154 : SchedWriteRes<[ADLPPort01_05_11, ADLPPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup154], (instrs MOVSX16rm8)>; -def ADLPWriteResGroup155 : SchedWriteRes<[ADLPPort01_05_10]>; +def ADLPWriteResGroup155 : SchedWriteRes<[ADLPPort01_05_11]>; def : InstRW<[ADLPWriteResGroup155], (instregex "^MOVSX(16|32|64)rr(8|16|32)$")>; def : InstRW<[ADLPWriteResGroup155], (instrs MOVSX32rr8_NOREX)>; @@ -1607,70 +1606,70 @@ def ADLPWriteResGroup159 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort05, ADLPP } def : InstRW<[ADLPWriteResGroup159], (instrs MWAITrr)>; -def ADLPWriteResGroup160 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup160 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[ADLPWriteResGroup160], (instrs OUT16ir)>; -def ADLPWriteResGroup161 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup161 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 6, 27, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[ADLPWriteResGroup161], (instrs OUT16rr)>; -def ADLPWriteResGroup162 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup162 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[ADLPWriteResGroup162], (instrs OUT32ir)>; -def ADLPWriteResGroup163 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup163 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 6, 29, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[ADLPWriteResGroup163], (instrs OUT32rr)>; -def ADLPWriteResGroup164 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup164 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[ADLPWriteResGroup164], (instrs OUT8ir)>; -def ADLPWriteResGroup165 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup165 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [5, 5, 26, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[ADLPWriteResGroup165], (instrs OUT8rr)>; -def ADLPWriteResGroup166 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup166 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 6, 25, 16, 7, 1, 17, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 80; } def : InstRW<[ADLPWriteResGroup166], (instrs OUTSB)>; -def ADLPWriteResGroup167 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup167 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [7, 6, 28, 16, 10, 1, 20, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 89; } def : InstRW<[ADLPWriteResGroup167], (instrs OUTSL)>; -def ADLPWriteResGroup168 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup168 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 83; } def : InstRW<[ADLPWriteResGroup168], (instrs OUTSW)>; -def ADLPWriteResGroup169 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup169 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 10; let NumMicroOps = 2; } @@ -1685,14 +1684,14 @@ def : InstRW<[ADLPWriteResGroup170], (instregex "^(V?)PACK(S|U)S(DW|WB)rr$", "^VPACK(S|U)S(DW|WB)Yrr$")>; def : InstRW<[ADLPWriteResGroup170], (instrs VPCMPGTQYrr)>; -def ADLPWriteResGroup171 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup171 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup171, ReadAfterVecXLd], (instregex "^(V?)P(ADD|SUB)(B|D|Q|W)rm$")>; def : InstRW<[ADLPWriteResGroup171, ReadAfterVecXLd], (instrs VPBLENDDrmi)>; -def ADLPWriteResGroup172 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup172 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 8; let NumMicroOps = 2; } @@ -1710,7 +1709,7 @@ def ADLPWriteResGroup174 : SchedWriteRes<[ADLPPort00_06, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup174], (instrs PAUSE)>; -def ADLPWriteResGroup175 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup175 : SchedWriteRes<[ADLPPort01, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1722,7 +1721,7 @@ def ADLPWriteResGroup176 : SchedWriteRes<[ADLPPort01_05, ADLPPort04_09, ADLPPort } def : InstRW<[ADLPWriteResGroup176], (instregex "^(V?)PEXTR(D|Q)mri$")>; -def ADLPWriteResGroup177 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup177 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 9; let NumMicroOps = 4; @@ -1737,7 +1736,7 @@ def ADLPWriteResGroup178 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05]> { def : InstRW<[ADLPWriteResGroup178], (instregex "^(V?)PH(ADD|SUB)SWrr$", "^VPH(ADD|SUB)SWYrr$")>; -def ADLPWriteResGroup179 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup179 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 12; let NumMicroOps = 3; } @@ -1751,41 +1750,41 @@ def : InstRW<[ADLPWriteResGroup180], (instregex "^POPA(16|32)$", "^PREFETCHIT(0|1)$")>; def : InstRW<[ADLPWriteResGroup180], (instrs POPF32)>; -def ADLPWriteResGroup181 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup181 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [6, 2, 1, 1]; let Latency = 5; let NumMicroOps = 10; } def : InstRW<[ADLPWriteResGroup181], (instrs POPF16)>; -def ADLPWriteResGroup182 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup182 : SchedWriteRes<[ADLPPort00_06, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 5; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup182], (instrs POPF64)>; -def ADLPWriteResGroup183 : SchedWriteRes<[ADLPPort02_03_11]> { +def ADLPWriteResGroup183 : SchedWriteRes<[ADLPPort02_03_10]> { let Latency = 0; } def : InstRW<[ADLPWriteResGroup183], (instregex "^PREFETCHT(0|1|2)$")>; def : InstRW<[ADLPWriteResGroup183], (instrs PREFETCHNTA)>; -def ADLPWriteResGroup184 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11, ADLPPort06]> { +def ADLPWriteResGroup184 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10, ADLPPort06]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup184], (instregex "^PTWRITE((64)?)m$")>; -def ADLPWriteResGroup185 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort06]> { +def ADLPWriteResGroup185 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort06]> { let ReleaseAtCycles = [1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup185], (instrs PTWRITE64r)>; -def ADLPWriteResGroup186 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort06]> { +def ADLPWriteResGroup186 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort06]> { let ReleaseAtCycles = [2, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 4; @@ -1797,7 +1796,7 @@ def ADLPWriteResGroup187 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup187], (instregex "^PUSH64r((mr)?)$")>; -def ADLPWriteResGroup188 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup188 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup188], (instrs PUSH64rmm)>; @@ -1818,49 +1817,49 @@ def ADLPWriteResGroup191 : SchedWriteRes<[ADLPPort01, ADLPPort04_09, ADLPPort07_ } def : InstRW<[ADLPWriteResGroup191], (instregex "^PUSH(F|G)S64$")>; -def ADLPWriteResGroup192 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup192 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 3, 2]; let Latency = 8; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup192], (instregex "^RC(L|R)(16|32|64)rCL$")>; -def ADLPWriteResGroup193 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup193 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup193, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>; -def ADLPWriteResGroup194 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup194 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [1, 5, 2]; let Latency = 20; let NumMicroOps = 8; } def : InstRW<[ADLPWriteResGroup194, WriteRMW], (instrs RCL8mCL)>; -def ADLPWriteResGroup195 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup195 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 5, 2]; let Latency = 7; let NumMicroOps = 9; } def : InstRW<[ADLPWriteResGroup195], (instrs RCL8rCL)>; -def ADLPWriteResGroup196 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup196 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [2, 4, 3]; let Latency = 20; let NumMicroOps = 9; } def : InstRW<[ADLPWriteResGroup196, WriteRMW], (instrs RCR8mCL)>; -def ADLPWriteResGroup197 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup197 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [3, 4, 3]; let Latency = 9; let NumMicroOps = 10; } def : InstRW<[ADLPWriteResGroup197], (instrs RCR8rCL)>; -def ADLPWriteResGroup198 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort01_05_10, ADLPPort05]> { +def ADLPWriteResGroup198 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort01_05_11, ADLPPort05]> { let ReleaseAtCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 54; @@ -1872,48 +1871,48 @@ def ADLPWriteResGroup199 : SchedWriteRes<[ADLPPort01]> { } def : InstRW<[ADLPWriteResGroup199], (instrs RDPID64)>; -def ADLPWriteResGroup200 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup200 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[ADLPWriteResGroup200], (instrs RDPKRUr)>; -def ADLPWriteResGroup201 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> { +def ADLPWriteResGroup201 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort05]> { let ReleaseAtCycles = [9, 6, 2, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 18; } def : InstRW<[ADLPWriteResGroup201], (instrs RDPMC)>; -def ADLPWriteResGroup202 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup202 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = 1386; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup202], (instrs RDRAND16r)>; -def ADLPWriteResGroup203 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup203 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup203], (instregex "^RDRAND(32|64)r$")>; -def ADLPWriteResGroup204 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup204 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = 1381; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup204], (instrs RDSEED16r)>; -def ADLPWriteResGroup205 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup205 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[ADLPWriteResGroup205], (instregex "^RDSEED(32|64)r$")>; -def ADLPWriteResGroup206 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> { +def ADLPWriteResGroup206 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort05]> { let ReleaseAtCycles = [5, 6, 3, 1]; let Latency = 18; let NumMicroOps = 15; @@ -1927,13 +1926,13 @@ def ADLPWriteResGroup207 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_ } def : InstRW<[ADLPWriteResGroup207], (instrs RDTSCP)>; -def ADLPWriteResGroup208 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup208 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup208], (instrs RET64)>; -def ADLPWriteResGroup209 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup209 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; @@ -1978,7 +1977,7 @@ def ADLPWriteResGroup215 : SchedWriteRes<[ADLPPort00_06]> { def : InstRW<[ADLPWriteResGroup215, WriteRMW], (instregex "^S(A|H)R8m(1|i)$", "^SHL8m(1|i)$")>; -def ADLPWriteResGroup216 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup216 : SchedWriteRes<[ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1991,7 +1990,7 @@ def ADLPWriteResGroup217 : SchedWriteRes<[ADLPPort00_06]> { def : InstRW<[ADLPWriteResGroup217], (instregex "^S(A|H)RX(32|64)rr$", "^SHLX(32|64)rr$")>; -def ADLPWriteResGroup218 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup218 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 7; @@ -2004,14 +2003,14 @@ def ADLPWriteResGroup219 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup219], (instrs SFENCE)>; -def ADLPWriteResGroup220 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup220 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [1, 2, 2, 2]; let Latency = 21; let NumMicroOps = 7; } def : InstRW<[ADLPWriteResGroup220], (instregex "^S(G|I)DT64m$")>; -def ADLPWriteResGroup221 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup221 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10, ADLPPort05]> { let Latency = 9; let NumMicroOps = 3; } @@ -2023,7 +2022,7 @@ def ADLPWriteResGroup222 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup222], (instrs SHA1MSG1rr)>; -def ADLPWriteResGroup223 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup223 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 2, 1, 2, 1]; let Latency = 13; let NumMicroOps = 8; @@ -2037,7 +2036,7 @@ def ADLPWriteResGroup224 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPP } def : InstRW<[ADLPWriteResGroup224], (instrs SHA1MSG2rr)>; -def ADLPWriteResGroup225 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup225 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let Latency = 8; let NumMicroOps = 4; } @@ -2049,7 +2048,7 @@ def ADLPWriteResGroup226 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPP } def : InstRW<[ADLPWriteResGroup226], (instrs SHA1NEXTErr)>; -def ADLPWriteResGroup227 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup227 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 13; let NumMicroOps = 2; } @@ -2062,7 +2061,7 @@ def ADLPWriteResGroup228 : SchedWriteRes<[ADLPPort05]> { def : InstRW<[ADLPWriteResGroup228], (instrs SHA1RNDS4rri, SHA256RNDS2rr)>; -def ADLPWriteResGroup229 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup229 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPPort00_06, ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [3, 2, 1, 1, 1]; let Latency = 12; let NumMicroOps = 8; @@ -2076,7 +2075,7 @@ def ADLPWriteResGroup230 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_01_05, ADLPP } def : InstRW<[ADLPWriteResGroup230], (instrs SHA256MSG1rr)>; -def ADLPWriteResGroup231 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup231 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; @@ -2090,63 +2089,63 @@ def ADLPWriteResGroup232 : SchedWriteRes<[ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup232], (instrs SHA256MSG2rr)>; -def ADLPWriteResGroup233 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup233 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[ADLPWriteResGroup233], (instrs SHRD16mri8)>; -def ADLPWriteResGroup234 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup234 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup234], (instregex "^SLDT(32|64)r$")>; -def ADLPWriteResGroup235 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort05]> { +def ADLPWriteResGroup235 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort05]> { let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup235], (instrs SMSW16r)>; -def ADLPWriteResGroup236 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort05]> { +def ADLPWriteResGroup236 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort05]> { let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup236], (instregex "^SMSW(32|64)r$")>; -def ADLPWriteResGroup237 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_11]> { +def ADLPWriteResGroup237 : SchedWriteRes<[ADLPPort00, ADLPPort02_03_10]> { let Latency = 24; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup237, ReadAfterVecLd], (instregex "^(V?)SQRTSDm_Int$")>; -def ADLPWriteResGroup238 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup238 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup238], (instrs STD)>; -def ADLPWriteResGroup239 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup239 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [1, 4, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup239], (instrs STI)>; -def ADLPWriteResGroup240 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup240 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 8; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup240], (instrs STOSB)>; -def ADLPWriteResGroup241 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort04_09, ADLPPort07_08]> { +def ADLPWriteResGroup241 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort04_09, ADLPPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 7; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup241], (instregex "^STOS(L|Q|W)$")>; -def ADLPWriteResGroup242 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort01]> { +def ADLPWriteResGroup242 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort01]> { let Latency = 5; let NumMicroOps = 2; } @@ -2158,7 +2157,7 @@ def ADLPWriteResGroup243 : SchedWriteRes<[ADLPPort00]> { def : InstRW<[ADLPWriteResGroup243], (instregex "^(TST|XAM)_F$")>; def : InstRW<[ADLPWriteResGroup243], (instrs UCOM_FPPr)>; -def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup244 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 9; let NumMicroOps = 4; @@ -2174,35 +2173,35 @@ def ADLPWriteResGroup245 : SchedWriteRes<[ADLPPort00_01_05]> { def : InstRW<[ADLPWriteResGroup245], (instregex "^VBLENDVP(D|S)rrr$")>; def : InstRW<[ADLPWriteResGroup245], (instrs VPBLENDVBrrr)>; -def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup246 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [6, 7, 18]; let Latency = 81; let NumMicroOps = 31; } def : InstRW<[ADLPWriteResGroup246], (instrs VERRm)>; -def ADLPWriteResGroup247 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup247 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [6, 7, 17]; let Latency = 74; let NumMicroOps = 30; } def : InstRW<[ADLPWriteResGroup247], (instrs VERRr)>; -def ADLPWriteResGroup248 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup248 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 8, 21]; let Latency = 81; let NumMicroOps = 34; } def : InstRW<[ADLPWriteResGroup248], (instrs VERWm)>; -def ADLPWriteResGroup249 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_11]> { +def ADLPWriteResGroup249 : SchedWriteRes<[ADLPPort00, ADLPPort01, ADLPPort02_03_10]> { let ReleaseAtCycles = [5, 8, 20]; let Latency = 74; let NumMicroOps = 33; } def : InstRW<[ADLPWriteResGroup249], (instrs VERWr)>; -def ADLPWriteResGroup250 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup250 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 29; let NumMicroOps = 8; @@ -2212,7 +2211,7 @@ def : InstRW<[ADLPWriteResGroup250, WriteVecMaskedGatherWriteback], (instregex " def : InstRW<[ADLPWriteResGroup250, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSYrm, VPGATHERQDYrm)>; -def ADLPWriteResGroup251 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup251 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 20; let NumMicroOps = 5; @@ -2222,7 +2221,7 @@ def : InstRW<[ADLPWriteResGroup251, WriteVecMaskedGatherWriteback], (instregex " def : InstRW<[ADLPWriteResGroup251, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSrm, VPGATHERQDrm)>; -def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 8]; let Latency = 30; let NumMicroOps = 12; @@ -2230,7 +2229,7 @@ def ADLPWriteResGroup252 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort def : InstRW<[ADLPWriteResGroup252, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; -def ADLPWriteResGroup253 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup253 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 28; let NumMicroOps = 8; @@ -2245,14 +2244,14 @@ def ADLPWriteResGroup254 : SchedWriteRes<[ADLPPort01_05, ADLPPort05]> { } def : InstRW<[ADLPWriteResGroup254], (instregex "^VH(ADD|SUB)P(D|S)rr$")>; -def ADLPWriteResGroup255 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup255 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup255, ReadAfterVecYLd], (instregex "^VINSERT(F|I)128rmi$", "^VP(ADD|SUB)(B|D|Q|W)Yrm$")>; -def ADLPWriteResGroup256 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort02_03_11]> { +def ADLPWriteResGroup256 : SchedWriteRes<[ADLPPort00, ADLPPort00_06, ADLPPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } @@ -2294,7 +2293,7 @@ def ADLPWriteResGroup262 : SchedWriteRes<[ADLPPort04_09, ADLPPort07_08]> { } def : InstRW<[ADLPWriteResGroup262], (instrs VMOVNTPSmr)>; -def ADLPWriteResGroup263 : SchedWriteRes<[ADLPPort02_03_11, ADLPPort05]> { +def ADLPWriteResGroup263 : SchedWriteRes<[ADLPPort02_03_10, ADLPPort05]> { let Latency = 11; let NumMicroOps = 2; } @@ -2302,21 +2301,21 @@ def : InstRW<[ADLPWriteResGroup263, ReadAfterVecYLd], (instregex "^VPACK(S|U)S(D def : InstRW<[ADLPWriteResGroup263, ReadAfterVecYLd], (instrs VPCMPGTQYrm)>; def : InstRW<[ADLPWriteResGroup263, ReadAfterVecXLd], (instrs VPCLMULQDQYrmi)>; -def ADLPWriteResGroup264 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup264 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instregex "^VSHUFP(D|S)Yrmi$")>; def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instrs VPBLENDWYrmi)>; -def ADLPWriteResGroup266 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> { +def ADLPWriteResGroup266 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 10; let NumMicroOps = 4; } def : InstRW<[ADLPWriteResGroup266, ReadAfterVecYLd], (instregex "^VPH(ADD|SUB)SWYrm$")>; -def ADLPWriteResGroup267 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10]> { +def ADLPWriteResGroup267 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11]> { let ReleaseAtCycles = [1, 2, 3, 3, 1]; let Latency = 16; let NumMicroOps = 10; @@ -2337,42 +2336,42 @@ def ADLPWriteResGroup269 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_ } def : InstRW<[ADLPWriteResGroup269], (instrs WRMSR)>; -def ADLPWriteResGroup270 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06, ADLPPort01, ADLPPort05]> { +def ADLPWriteResGroup270 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06, ADLPPort01, ADLPPort05]> { let ReleaseAtCycles = [2, 1, 4, 1]; let Latency = AlderlakePModel.MaxLatency; let NumMicroOps = 8; } def : InstRW<[ADLPWriteResGroup270], (instrs WRPKRUr)>; -def ADLPWriteResGroup271 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup271 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 12; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup271, WriteRMW], (instregex "^XADD(16|32|64)rm$")>; -def ADLPWriteResGroup272 : SchedWriteRes<[ADLPPort00_01_05_06_10]> { +def ADLPWriteResGroup272 : SchedWriteRes<[ADLPPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 13; let NumMicroOps = 2; } def : InstRW<[ADLPWriteResGroup272, WriteRMW], (instrs XADD8rm)>; -def ADLPWriteResGroup273 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup273 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 39; let NumMicroOps = 5; } def : InstRW<[ADLPWriteResGroup273, WriteRMW], (instregex "^XCHG(16|32)rm$")>; -def ADLPWriteResGroup274 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup274 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [5, 1]; let Latency = 39; let NumMicroOps = 6; } def : InstRW<[ADLPWriteResGroup274, WriteRMW], (instrs XCHG64rm)>; -def ADLPWriteResGroup275 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_06]> { +def ADLPWriteResGroup275 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 40; let NumMicroOps = 5; @@ -2386,14 +2385,14 @@ def ADLPWriteResGroup276 : SchedWriteRes<[ADLPPort00, ADLPPort00_01_05_06, ADLPP } def : InstRW<[ADLPWriteResGroup276], (instrs XCH_F)>; -def ADLPWriteResGroup277 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01]> { +def ADLPWriteResGroup277 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01]> { let ReleaseAtCycles = [7, 3, 8, 5]; let Latency = 4; let NumMicroOps = 23; } def : InstRW<[ADLPWriteResGroup277], (instrs XGETBV)>; -def ADLPWriteResGroup278 : SchedWriteRes<[ADLPPort00_01_05_06_10, ADLPPort02_03_11]> { +def ADLPWriteResGroup278 : SchedWriteRes<[ADLPPort00_01_05_06_11, ADLPPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 7; let NumMicroOps = 3; @@ -2408,63 +2407,63 @@ def ADLPWriteResGroup279 : SchedWriteRes<[ADLPPort00_01_05_06, ADLPPort01, ADLPP def : InstRW<[ADLPWriteResGroup279], (instregex "^XRSTOR((S|64)?)$")>; def : InstRW<[ADLPWriteResGroup279], (instrs XRSTORS64)>; -def ADLPWriteResGroup280 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup280 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 42; let NumMicroOps = 140; } def : InstRW<[ADLPWriteResGroup280], (instrs XSAVE)>; -def ADLPWriteResGroup281 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup281 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 41; let NumMicroOps = 140; } def : InstRW<[ADLPWriteResGroup281], (instrs XSAVE64)>; -def ADLPWriteResGroup282 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup282 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 151; } def : InstRW<[ADLPWriteResGroup282], (instrs XSAVEC)>; -def ADLPWriteResGroup283 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup283 : SchedWriteRes<[ADLPPort00, ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 152; } def : InstRW<[ADLPWriteResGroup283], (instrs XSAVEC64)>; -def ADLPWriteResGroup284 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup284 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [25, 35, 52, 27, 4, 1, 10, 1]; let Latency = 46; let NumMicroOps = 155; } def : InstRW<[ADLPWriteResGroup284], (instrs XSAVEOPT)>; -def ADLPWriteResGroup285 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup285 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [25, 35, 53, 27, 4, 1, 10, 1]; let Latency = 46; let NumMicroOps = 156; } def : InstRW<[ADLPWriteResGroup285], (instrs XSAVEOPT64)>; -def ADLPWriteResGroup286 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup286 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2]; let Latency = 42; let NumMicroOps = 184; } def : InstRW<[ADLPWriteResGroup286], (instrs XSAVES)>; -def ADLPWriteResGroup287 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_11, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { +def ADLPWriteResGroup287 : SchedWriteRes<[ADLPPort00_01, ADLPPort00_05, ADLPPort00_06, ADLPPort01, ADLPPort01_05, ADLPPort02_03_10, ADLPPort04_09, ADLPPort05, ADLPPort07_08]> { let ReleaseAtCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2]; let Latency = 42; let NumMicroOps = 186; } def : InstRW<[ADLPWriteResGroup287], (instrs XSAVES64)>; -def ADLPWriteResGroup288 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_10, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_10, ADLPPort05]> { +def ADLPWriteResGroup288 : SchedWriteRes<[ADLPPort00_01_05, ADLPPort00_01_05_06_11, ADLPPort00_05_06, ADLPPort00_06, ADLPPort01, ADLPPort01_05_11, ADLPPort05]> { let ReleaseAtCycles = [4, 23, 2, 14, 8, 1, 2]; let Latency = 5; let NumMicroOps = 54; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 699ca91cd1f8f4..5b50e1943e3db1 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -367,32 +367,37 @@ defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : BWWriteResPair; +defm : X86WriteRes; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteResUnsupported; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b820418bb55191..d06e8a99370976 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -364,33 +364,41 @@ defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; defm : X86WriteRes; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; @@ -983,7 +991,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { let NumMicroOps = 2; let ReleaseAtCycles = [1,1]; } -def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>; def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { @@ -1349,13 +1356,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>; -def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ReleaseAtCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>; - def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 7be9f51bcd46bd..775ad6b1078a53 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -348,28 +348,33 @@ defm : X86WriteRes defm : X86WriteRes; defm : X86WriteRes; // Unsupported = 1 -defm : SBWriteResPair; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; // Unsupported = 1 -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; // Unsupported = 1 - -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +// F16C Instructions (IvyBridge+) +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 8a23d1b103aa6b..e04ff68d278b2b 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -56,15 +56,15 @@ def SPRPort00_05 : ProcResGroup<[SPRPort00, SPRPort05]>; def SPRPort00_05_06 : ProcResGroup<[SPRPort00, SPRPort05, SPRPort06]>; def SPRPort00_06 : ProcResGroup<[SPRPort00, SPRPort06]>; def SPRPort01_05 : ProcResGroup<[SPRPort01, SPRPort05]>; -def SPRPort01_05_10 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>; +def SPRPort01_05_11 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort11]>; def SPRPort02_03 : ProcResGroup<[SPRPort02, SPRPort03]>; -def SPRPort02_03_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>; +def SPRPort02_03_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort10]>; def SPRPort05_11 : ProcResGroup<[SPRPort05, SPRPort11]>; def SPRPort07_08 : ProcResGroup<[SPRPort07, SPRPort08]>; // EU has 112 reservation stations. -def SPRPort00_01_05_06_10 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05, - SPRPort06, SPRPort10]> { +def SPRPort00_01_05_06_11 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05, + SPRPort06, SPRPort11]> { let BufferSize = 112; } @@ -74,8 +74,8 @@ def SPRPort04_09 : ProcResGroup<[SPRPort04, SPRPort09]> { } // MEM has 72 reservation stations. -def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07, - SPRPort08, SPRPort11]> { +def SPRPort02_03_07_08_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07, + SPRPort08, SPRPort10]> { let BufferSize = 72; } @@ -113,7 +113,7 @@ multiclass SPRWriteResPair { + def : WriteRes { let Latency = !add(Lat, LoadLat); let ReleaseAtCycles = !listconcat([1], Res); let NumMicroOps = !add(UOps, LoadUOps); @@ -126,71 +126,71 @@ multiclass SPRWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes; +def : WriteRes { let Latency = 11; } defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; def : WriteRes; defm : X86WriteRes; defm : SPRWriteResPair; def : WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; def : WriteRes { let Latency = 11; } -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -202,12 +202,12 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -235,7 +235,7 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -251,13 +251,13 @@ defm : SPRWriteResPair; def : WriteRes; defm : X86WriteRes; defm : X86WriteRes; -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : SPRWriteResPair; @@ -270,8 +270,8 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -334,15 +334,15 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 2; } -defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; defm : X86WriteRes; -defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -359,10 +359,10 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; defm : SPRWriteResPair; -def : WriteRes { +def : WriteRes { let Latency = 5; } def : WriteRes { @@ -370,7 +370,7 @@ def : WriteRes { } defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : SPRWriteResPair; +defm : SPRWriteResPair; defm : SPRWriteResPair; def : WriteRes { let Latency = SapphireRapidsModel.MaxLatency; @@ -380,9 +380,9 @@ def : WriteRes { } defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -397,16 +397,16 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes { let Latency = 3; } @@ -434,7 +434,7 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; @@ -447,7 +447,7 @@ defm : SPRWriteResPair; def : WriteRes { let Latency = 3; } -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : SPRWriteResPair; @@ -455,20 +455,20 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; -def : WriteRes { +defm : X86WriteRes; +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } -def : WriteRes { +def : WriteRes { let Latency = 7; } -def : WriteRes { +def : WriteRes { let Latency = 8; } defm : SPRWriteResPair; @@ -482,8 +482,8 @@ def : WriteRes { let Latency = 4; } defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -504,11 +504,11 @@ defm : SPRWriteResPair; defm : SPRWriteResPair; defm : SPRWriteResPair; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -516,26 +516,26 @@ defm : X86WriteRes; defm : X86WriteRes; defm : SPRWriteResPair; defm : SPRWriteResPair; -defm : X86WriteRes; +defm : X86WriteRes; def : WriteRes; // Infered SchedWriteRes and InstRW definition. -def SPRWriteResGroup0 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09]> { +def SPRWriteResGroup0 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_10, SPRPort04, SPRPort04_09]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup0], (instregex "^AA(D|N)D64mr$", "^A(X?)OR64mr$")>; -def SPRWriteResGroup1 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup1 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 12; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup1, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(16|32|64)mr$")>; -def SPRWriteResGroup2 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup2 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -543,20 +543,20 @@ def : InstRW<[SPRWriteResGroup2], (instregex "^RORX(32|64)mi$")>; def : InstRW<[SPRWriteResGroup2, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(8|16|32|64)rm$", "^AD(C|O)X(32|64)rm$")>; -def SPRWriteResGroup3 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup3 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>; -def SPRWriteResGroup4 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup4 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup4, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)8mr$")>; -def SPRWriteResGroup5 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup5 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -585,7 +585,7 @@ def : InstRW<[SPRWriteResGroup6], (instregex "^(ADD|SUB)64ri8$", def : InstRW<[SPRWriteResGroup6], (instrs CLC, JMP_2)>; -def SPRWriteResGroup7 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup7 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 13; let NumMicroOps = 4; } @@ -598,7 +598,7 @@ def : InstRW<[SPRWriteResGroup7, ReadAfterLd, ReadDefault, ReadDefault, ReadDefa "^(X?)OR8mr$")>; def : InstRW<[SPRWriteResGroup7, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs SUB8mr)>; -def SPRWriteResGroup8 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup8 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 2; } @@ -620,7 +620,7 @@ def : InstRW<[SPRWriteResGroup9], (instregex "^(V?)(ADD|SUB)PSrr$", "^VPUNPCK(H|L)(BW|WD)Z(128|256)rrk(z?)$")>; def : InstRW<[SPRWriteResGroup9], (instrs VADDSUBPSYrr)>; -def SPRWriteResGroup10 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup10 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 10; let NumMicroOps = 2; } @@ -646,7 +646,7 @@ def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instregex "^VFPCLASSP(D|H|S "^VPERM(I|T)2PDZ128rmbkz$")>; def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instrs VPERMBZ128rm)>; -def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; @@ -695,7 +695,7 @@ def : InstRW<[SPRWriteResGroup12], (instrs ADD_FST0r, VPCMPGTQYrr, VPERMDYrr)>; -def SPRWriteResGroup13 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup13 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let Latency = 2; } def : InstRW<[SPRWriteResGroup13], (instregex "^AND(8|16|32|64)r(r|i8)$", @@ -713,7 +713,7 @@ def : InstRW<[SPRWriteResGroup13], (instregex "^AND(8|16|32|64)r(r|i8)$", "^TEST(8|16|32|64)rr$")>; def : InstRW<[SPRWriteResGroup13], (instrs XOR8rr_NOREX)>; -def SPRWriteResGroup14 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup14 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } @@ -723,18 +723,18 @@ def : InstRW<[SPRWriteResGroup14, ReadAfterLd], (instregex "^(X?)OR64rm$")>; def : InstRW<[SPRWriteResGroup14, ReadAfterLd], (instrs AND64rm)>; def : InstRW<[SPRWriteResGroup14, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^TEST(8|16|32|64)mr$")>; -def SPRWriteResGroup15 : SchedWriteRes<[SPRPort01_05_10, SPRPort02_03_11]> { +def SPRWriteResGroup15 : SchedWriteRes<[SPRPort01_05_11, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup15, ReadAfterLd], (instregex "^ANDN(32|64)rm$")>; -def SPRWriteResGroup16 : SchedWriteRes<[SPRPort01_05_10]> { +def SPRWriteResGroup16 : SchedWriteRes<[SPRPort01_05_11]> { let Latency = 2; } def : InstRW<[SPRWriteResGroup16], (instregex "^ANDN(32|64)rr$")>; -def SPRWriteResGroup17 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup17 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 2, 1, 1]; let Latency = 10; let NumMicroOps = 9; @@ -747,14 +747,14 @@ def SPRWriteResGroup18 : SchedWriteRes<[SPRPort01]> { def : InstRW<[SPRWriteResGroup18], (instregex "^BT((C|R|S)?)64rr$", "^P(DEP|EXT)(32|64)rr$")>; -def SPRWriteResGroup19 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup19 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 2, 1, 1, 1, 1]; let Latency = 17; let NumMicroOps = 10; } def : InstRW<[SPRWriteResGroup19], (instregex "^BT(C|R|S)64mr$")>; -def SPRWriteResGroup20 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup20 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 7; let NumMicroOps = 5; } @@ -789,25 +789,25 @@ def SPRWriteResGroup24 : SchedWriteRes<[SPRPort00_06]>; def : InstRW<[SPRWriteResGroup24], (instregex "^C(DQ|QO)$", "^(CL|ST)AC$")>; -def SPRWriteResGroup25 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup25 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let Latency = 3; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup25], (instrs CLD)>; -def SPRWriteResGroup26 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup26 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let Latency = 3; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup26], (instrs CLDEMOTE)>; -def SPRWriteResGroup27 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup27 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort04_09, SPRPort07_08]> { let Latency = 2; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup27], (instrs CLFLUSH)>; -def SPRWriteResGroup28 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup28 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let Latency = 2; let NumMicroOps = 3; } @@ -827,35 +827,35 @@ def SPRWriteResGroup30 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup30], (instrs CLTS)>; -def SPRWriteResGroup31 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup31 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let Latency = 5; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup31], (instregex "^MOV16o(16|32|64)a$")>; def : InstRW<[SPRWriteResGroup31], (instrs CLWB)>; -def SPRWriteResGroup32 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup32 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 2]; let Latency = 6; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup32], (instregex "^CMPS(B|L|Q|W)$")>; -def SPRWriteResGroup33 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup33 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [2, 7, 6, 2, 1, 1, 2, 1]; let Latency = 32; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup33], (instrs CMPXCHG16B)>; -def SPRWriteResGroup34 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup34 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 7, 2, 1, 1, 1]; let Latency = 25; let NumMicroOps = 16; } def : InstRW<[SPRWriteResGroup34], (instrs CMPXCHG8B)>; -def SPRWriteResGroup35 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup35 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 13; let NumMicroOps = 6; @@ -869,7 +869,7 @@ def SPRWriteResGroup36 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_06, S } def : InstRW<[SPRWriteResGroup36], (instrs CPUID)>; -def SPRWriteResGroup37 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup37 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 12; let NumMicroOps = 3; } @@ -884,7 +884,7 @@ def : InstRW<[SPRWriteResGroup37, ReadAfterVecLd], (instregex "^(V?)CVTSI642SSrm "^VCVT(U?)SI642SSZrm((_Int)?)$")>; def : InstRW<[SPRWriteResGroup37, ReadAfterVecLd], (instrs VCVTSI642SSrm)>; -def SPRWriteResGroup38 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup38 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_10]> { let Latency = 26; let NumMicroOps = 3; } @@ -926,7 +926,7 @@ def : InstRW<[SPRWriteResGroup41], (instregex "^(V?)CVT(T?)SS2SI64rr_Int$", def : InstRW<[SPRWriteResGroup41], (instrs VCVTTSS2USI64Zrr)>; def : InstRW<[SPRWriteResGroup41, ReadDefault], (instregex "^(V?)CVT(T?)SS2SI64rr$")>; -def SPRWriteResGroup42 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup42 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let Latency = 2; let NumMicroOps = 2; } @@ -942,18 +942,18 @@ def : InstRW<[SPRWriteResGroup43], (instrs DEC16r_alt, ST_FPrr, SYSCALL)>; -def SPRWriteResGroup44 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup44 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 7; } def : InstRW<[SPRWriteResGroup44], (instrs DEC32r_alt)>; -def SPRWriteResGroup45 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup45 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 27; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup45], (instregex "^DIVR_F(32|64)m$")>; -def SPRWriteResGroup46 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup46 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 30; let NumMicroOps = 3; } @@ -965,14 +965,14 @@ def SPRWriteResGroup47 : SchedWriteRes<[SPRPort00]> { def : InstRW<[SPRWriteResGroup47], (instregex "^DIVR_F(P?)rST0$")>; def : InstRW<[SPRWriteResGroup47], (instrs DIVR_FST0r)>; -def SPRWriteResGroup48 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup48 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 19; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup48, ReadAfterVecLd], (instregex "^(V?)DIVSDrm$")>; def : InstRW<[SPRWriteResGroup48, ReadAfterVecLd], (instrs VDIVSDZrm)>; -def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 22; let NumMicroOps = 2; } @@ -980,7 +980,7 @@ def : InstRW<[SPRWriteResGroup49], (instregex "^DIV_F(32|64)m$")>; def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm_Int((k|kz)?)$")>; def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instrs VSQRTSHZm)>; -def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 25; let NumMicroOps = 3; } @@ -998,7 +998,7 @@ def : InstRW<[SPRWriteResGroup52], (instregex "^ENQCMD(S?)(16|32|64)$", "^ST_F(32|64)m$")>; def : InstRW<[SPRWriteResGroup52], (instrs PUSHF32)>; -def SPRWriteResGroup53 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup53 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [2, 21, 2, 14, 4, 9, 5]; let Latency = 126; let NumMicroOps = 57; @@ -1023,13 +1023,13 @@ def : InstRW<[SPRWriteResGroup55], (instrs MMX_PEXTRWrri, VEXTRACTPSZrri, VPERMWZrr)>; -def SPRWriteResGroup56 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort06]> { +def SPRWriteResGroup56 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_10, SPRPort04, SPRPort04_09, SPRPort06]> { let Latency = 7; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup56], (instrs FARCALL64m)>; -def SPRWriteResGroup57 : SchedWriteRes<[SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup57 : SchedWriteRes<[SPRPort02_03_10, SPRPort06]> { let Latency = 6; let NumMicroOps = 2; } @@ -1051,7 +1051,7 @@ def SPRWriteResGroup59 : SchedWriteRes<[SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup59], (instrs FDECSTP)>; -def SPRWriteResGroup60 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup60 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 11; let NumMicroOps = 3; @@ -1069,13 +1069,13 @@ def : InstRW<[SPRWriteResGroup61], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rr$", def : InstRW<[SPRWriteResGroup61], (instrs FINCSTP, FNOP)>; -def SPRWriteResGroup62 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup62 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup62], (instrs FLDCW16m)>; -def SPRWriteResGroup63 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03, SPRPort02_03_11]> { +def SPRWriteResGroup63 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 5, 10, 39, 8]; let Latency = 62; let NumMicroOps = 64; @@ -1121,28 +1121,28 @@ def SPRWriteResGroup69 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort00 } def : InstRW<[SPRWriteResGroup69], (instrs FSTENVm)>; -def SPRWriteResGroup70 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup70 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [4, 1, 2, 1, 47, 33, 2]; let Latency = 63; let NumMicroOps = 90; } def : InstRW<[SPRWriteResGroup70], (instrs FXRSTOR)>; -def SPRWriteResGroup71 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup71 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [4, 1, 2, 1, 45, 31, 4]; let Latency = 63; let NumMicroOps = 88; } def : InstRW<[SPRWriteResGroup71], (instrs FXRSTOR64)>; -def SPRWriteResGroup72 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup72 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [2, 5, 10, 10, 2, 38, 5, 38]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 110; } def : InstRW<[SPRWriteResGroup72], (instregex "^FXSAVE((64)?)$")>; -def SPRWriteResGroup73 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup73 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 2; } @@ -1212,41 +1212,41 @@ def : InstRW<[SPRWriteResGroup74], (instregex "^(V?)GF2P8MULBrr$", def : InstRW<[SPRWriteResGroup74], (instrs VCVTSH2SSZrr, VGF2P8MULBYrr)>; -def SPRWriteResGroup75 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup75 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 5, 26, 19, 2, 7, 21]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup75], (instrs IN16ri)>; -def SPRWriteResGroup76 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup76 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 1, 4, 26, 19, 3, 7, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup76], (instrs IN16rr)>; -def SPRWriteResGroup77 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup77 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 6, 28, 21, 2, 10, 20]; let Latency = 35; let NumMicroOps = 94; } def : InstRW<[SPRWriteResGroup77], (instrs IN32ri)>; -def SPRWriteResGroup78 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup78 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 9, 28, 21, 2, 11, 21]; let NumMicroOps = 99; } def : InstRW<[SPRWriteResGroup78], (instrs IN32rr)>; -def SPRWriteResGroup79 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup79 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 8, 20]; let Latency = 35; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup79], (instrs IN8ri)>; -def SPRWriteResGroup80 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup80 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 6, 25, 19, 2, 7, 20]; let Latency = 35; let NumMicroOps = 86; @@ -1258,7 +1258,7 @@ def SPRWriteResGroup81 : SchedWriteRes<[SPRPort00_06]> { } def : InstRW<[SPRWriteResGroup81], (instrs INC16r_alt)>; -def SPRWriteResGroup82 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup82 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 7; } def : InstRW<[SPRWriteResGroup82], (instregex "^LD_F(32|64|80)m$", @@ -1269,28 +1269,28 @@ def : InstRW<[SPRWriteResGroup82], (instregex "^LD_F(32|64|80)m$", def : InstRW<[SPRWriteResGroup82], (instrs INC32r_alt, VBROADCASTI32X2Z128rm)>; -def SPRWriteResGroup83 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup83 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 6, 24, 17, 8, 1, 19, 1]; let Latency = 20; let NumMicroOps = 83; } def : InstRW<[SPRWriteResGroup83], (instrs INSB)>; -def SPRWriteResGroup84 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup84 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1]; let Latency = 20; let NumMicroOps = 92; } def : InstRW<[SPRWriteResGroup84], (instrs INSL)>; -def SPRWriteResGroup85 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup85 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1]; let Latency = 20; let NumMicroOps = 86; } def : InstRW<[SPRWriteResGroup85], (instrs INSW)>; -def SPRWriteResGroup86 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup86 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 4, 8, 6, 2, 5, 7, 5]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 42; @@ -1350,7 +1350,7 @@ def : InstRW<[SPRWriteResGroup92], (instregex "^KAND(B|D|Q|W|ND|NQ|NW)kk$", def : InstRW<[SPRWriteResGroup92], (instrs KANDNBkk, VPSUBUSBZrr)>; -def SPRWriteResGroup93 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup93 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 7; let NumMicroOps = 2; } @@ -1397,48 +1397,48 @@ def : InstRW<[SPRWriteResGroup96], (instregex "^K((OR)?)TEST(B|D|Q|W)kk$", "^VPSUBUS(B|W)Zrrk(z?)$")>; def : InstRW<[SPRWriteResGroup96], (instrs VMOVSDto64Zrr)>; -def SPRWriteResGroup97 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup97 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [8, 2, 14, 3, 1]; let Latency = 198; let NumMicroOps = 81; } def : InstRW<[SPRWriteResGroup97], (instrs LAR16rm)>; -def SPRWriteResGroup98 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup98 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 66; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup98], (instrs LAR16rr)>; -def SPRWriteResGroup99 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup99 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 85; } def : InstRW<[SPRWriteResGroup99], (instrs LAR32rm)>; -def SPRWriteResGroup100 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup100 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 3, 1, 8, 5, 1, 2, 1]; let Latency = 65; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup100], (instregex "^LAR(32|64)rr$")>; -def SPRWriteResGroup101 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup101 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 2, 9, 5, 3, 1]; let Latency = 71; let NumMicroOps = 87; } def : InstRW<[SPRWriteResGroup101], (instrs LAR64rm)>; -def SPRWriteResGroup102 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup102 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let Latency = 2; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup102], (instrs LEA16r)>; -def SPRWriteResGroup103 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup103 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 6; let NumMicroOps = 4; @@ -1447,104 +1447,104 @@ def : InstRW<[SPRWriteResGroup103], (instregex "^LODS(B|W)$", "^SCAS(B|L|Q|W)$")>; def : InstRW<[SPRWriteResGroup103], (instrs LEAVE)>; -def SPRWriteResGroup104 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup104 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup104], (instrs LEAVE64)>; -def SPRWriteResGroup105 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup105 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 3, 2, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[SPRWriteResGroup105], (instrs LGDT64m)>; -def SPRWriteResGroup106 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup106 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 5, 3, 2, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 14; } def : InstRW<[SPRWriteResGroup106], (instrs LIDT64m)>; -def SPRWriteResGroup107 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup107 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [5, 3, 2, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 12; } def : InstRW<[SPRWriteResGroup107], (instrs LLDT16m)>; -def SPRWriteResGroup108 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup108 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 4, 3, 1, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 11; } def : InstRW<[SPRWriteResGroup108], (instrs LLDT16r)>; -def SPRWriteResGroup109 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup109 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 27; } def : InstRW<[SPRWriteResGroup109], (instrs LMSW16m)>; -def SPRWriteResGroup110 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup110 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 7, 1, 2, 5, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 22; } def : InstRW<[SPRWriteResGroup110], (instrs LMSW16r)>; -def SPRWriteResGroup111 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup111 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 5; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup111], (instregex "^LODS(L|Q)$")>; -def SPRWriteResGroup112 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup112 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 4, 1]; let Latency = 3; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup112], (instrs LOOP)>; -def SPRWriteResGroup113 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup113 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 3; let NumMicroOps = 11; } def : InstRW<[SPRWriteResGroup113], (instrs LOOPE)>; -def SPRWriteResGroup114 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup114 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [4, 6, 1]; let Latency = 2; let NumMicroOps = 11; } def : InstRW<[SPRWriteResGroup114], (instrs LOOPNE)>; -def SPRWriteResGroup115 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup115 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup115], (instrs LRET64)>; -def SPRWriteResGroup116 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup116 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 5, 3, 3, 1]; let Latency = 70; let NumMicroOps = 13; } def : InstRW<[SPRWriteResGroup116], (instregex "^LSL(16|32|64)rm$")>; -def SPRWriteResGroup117 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup117 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 4, 4, 3, 2, 1]; let Latency = 63; let NumMicroOps = 15; } def : InstRW<[SPRWriteResGroup117], (instregex "^LSL(16|32|64)rr$")>; -def SPRWriteResGroup118 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup118 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 24; let NumMicroOps = 3; } @@ -1574,7 +1574,7 @@ def SPRWriteResGroup121 : SchedWriteRes<[SPRPort00, SPRPort00_01]> { } def : InstRW<[SPRWriteResGroup121], (instrs MMX_CVTPI2PSrr)>; -def SPRWriteResGroup122 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup122 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 13; let NumMicroOps = 2; } @@ -1600,7 +1600,7 @@ def SPRWriteResGroup125 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { def : InstRW<[SPRWriteResGroup125], (instregex "^VMOV(W|SHZ)mr$")>; def : InstRW<[SPRWriteResGroup125], (instrs MMX_MOVD64mr)>; -def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 8; } def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$", @@ -1631,7 +1631,7 @@ def SPRWriteResGroup128 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup128], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>; -def SPRWriteResGroup129 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup129 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 12; let NumMicroOps = 3; @@ -1653,7 +1653,7 @@ def : InstRW<[SPRWriteResGroup130], (instregex "^MMX_PACKSS(DW|WB)rr$", def : InstRW<[SPRWriteResGroup130], (instrs MMX_PACKUSWBrr)>; def : InstRW<[SPRWriteResGroup130, ReadDefault, ReadInt2Fpu], (instrs MMX_PINSRWrri)>; -def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -1675,7 +1675,7 @@ def : InstRW<[SPRWriteResGroup131, ReadAfterVecYLd], (instregex "^VINSERT(F|I)(3 "^VPTERNLOG(D|Q)Zrmbik(z?)$", "^VPTERNLOG(D|Q)Zrmi((kz)?)$")>; -def SPRWriteResGroup132 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup132 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 11; let NumMicroOps = 4; @@ -1689,7 +1689,7 @@ def SPRWriteResGroup133 : SchedWriteRes<[SPRPort00, SPRPort05]> { } def : InstRW<[SPRWriteResGroup133], (instregex "^MMX_PH(ADD|SUB)SWrr$")>; -def SPRWriteResGroup134 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup134 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 9; let NumMicroOps = 2; } @@ -1702,7 +1702,7 @@ def : InstRW<[SPRWriteResGroup134, ReadAfterVecLd], (instregex "^VFPCLASSS(D|H|S def : InstRW<[SPRWriteResGroup134, ReadAfterVecYLd], (instregex "^VPALIGNR(Y|Z256)rmi$")>; def : InstRW<[SPRWriteResGroup134, ReadAfterVecYLd], (instrs VPSHUFBZrm)>; -def SPRWriteResGroup135 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup135 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let Latency = 5; let NumMicroOps = 2; } @@ -1716,42 +1716,42 @@ def : InstRW<[SPRWriteResGroup136], (instregex "^PUSH(F|G)S(16|32)$")>; def : InstRW<[SPRWriteResGroup136], (instrs MOV16ms, MOVBE32mr)>; -def SPRWriteResGroup137 : SchedWriteRes<[SPRPort00_01_05_06_10]>; +def SPRWriteResGroup137 : SchedWriteRes<[SPRPort00_01_05_06_11]>; def : InstRW<[SPRWriteResGroup137], (instregex "^MOV(8|16|32|64)ri$", "^MOV(8|16|32)ri_alt$", "^MOV(8|16)rr((_REV)?)$")>; def : InstRW<[SPRWriteResGroup137], (instrs MOV64ri32, MOV8rr_NOREX)>; -def SPRWriteResGroup138 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup138 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup138], (instregex "^MOV(16|32|64)rs$", "^S(TR|LDT)16r$")>; -def SPRWriteResGroup139 : SchedWriteRes<[SPRPort02_03_11]>; +def SPRWriteResGroup139 : SchedWriteRes<[SPRPort02_03_10]>; def : InstRW<[SPRWriteResGroup139], (instregex "^MOV32ao(16|32|64)$")>; def : InstRW<[SPRWriteResGroup139], (instrs MOV64ao64)>; -def SPRWriteResGroup140 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup140 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup140], (instregex "^MOV(8|32)o(16|32)a$", "^MOV(8|32|64)o64a$")>; -def SPRWriteResGroup141 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup141 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let Latency = 0; } def : InstRW<[SPRWriteResGroup141], (instregex "^MOV32rr((_REV)?)$", "^MOVZX(32|64)rr8$")>; def : InstRW<[SPRWriteResGroup141], (instrs MOVZX32rr8_NOREX)>; -def SPRWriteResGroup142 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup142 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 5; } def : InstRW<[SPRWriteResGroup142], (instrs MOV64ao32)>; -def SPRWriteResGroup143 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup143 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2]; let Latency = 217; let NumMicroOps = 48; @@ -1764,20 +1764,20 @@ def SPRWriteResGroup144 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup144], (instrs MOV64o32a)>; -def SPRWriteResGroup145 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort05]> { +def SPRWriteResGroup145 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort05]> { let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup145], (instrs MOV64rc)>; -def SPRWriteResGroup146 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> { +def SPRWriteResGroup146 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort05]> { let ReleaseAtCycles = [3, 4, 8, 4, 2, 3]; let Latency = 181; let NumMicroOps = 24; } def : InstRW<[SPRWriteResGroup146], (instrs MOV64rd)>; -def SPRWriteResGroup147 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup147 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup147], (instregex "^MOV8ao(16|32|64)$")>; @@ -1788,13 +1788,13 @@ def SPRWriteResGroup148 : SchedWriteRes<[SPRPort00_06, SPRPort04_09, SPRPort07_0 } def : InstRW<[SPRWriteResGroup148], (instrs MOVBE16mr)>; -def SPRWriteResGroup149 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup149 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup149], (instrs MOVBE16rm)>; -def SPRWriteResGroup150 : SchedWriteRes<[SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup150 : SchedWriteRes<[SPRPort01, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } @@ -1809,13 +1809,13 @@ def : InstRW<[SPRWriteResGroup151], (instrs MOVBE64mr, SLDT16m, STRm)>; -def SPRWriteResGroup152 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup152 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup152], (instrs MOVBE64rm)>; -def SPRWriteResGroup153 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup153 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup153], (instregex "^MOVDIR64B(16|32|64)$")>; @@ -1832,7 +1832,7 @@ def SPRWriteResGroup155 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup155], (instrs MOVDIRI64)>; -def SPRWriteResGroup156 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup156 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -1855,7 +1855,7 @@ def SPRWriteResGroup158 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup158], (instrs MOVNTImr)>; -def SPRWriteResGroup159 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup159 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 8; let NumMicroOps = 7; @@ -1876,31 +1876,31 @@ def : InstRW<[SPRWriteResGroup160], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$", "^VPTERNLOG(D|Q)Z(128|256)rri((k|kz)?)$")>; def : InstRW<[SPRWriteResGroup160], (instrs VPBLENDDrri)>; -def SPRWriteResGroup161 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup161 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [4, 1, 1, 1]; let Latency = 7; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup161], (instregex "^MOVS(L|Q|W)$")>; -def SPRWriteResGroup162 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup162 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 6; } def : InstRW<[SPRWriteResGroup162], (instregex "^MOVSX(16|32|64)rm(16|32)$", "^MOVSX(32|64)rm8$")>; def : InstRW<[SPRWriteResGroup162], (instrs MOVSX32rm8_NOREX)>; -def SPRWriteResGroup163 : SchedWriteRes<[SPRPort01_05_10, SPRPort02_03_11]> { +def SPRWriteResGroup163 : SchedWriteRes<[SPRPort01_05_11, SPRPort02_03_10]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup163], (instrs MOVSX16rm8)>; -def SPRWriteResGroup164 : SchedWriteRes<[SPRPort01_05_10]>; +def SPRWriteResGroup164 : SchedWriteRes<[SPRPort01_05_11]>; def : InstRW<[SPRWriteResGroup164], (instregex "^MOVSX(16|32|64)rr(8|16|32)$")>; def : InstRW<[SPRWriteResGroup164], (instrs MOVSX32rr8_NOREX)>; -def SPRWriteResGroup165 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup165 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -1916,7 +1916,7 @@ def : InstRW<[SPRWriteResGroup165, ReadAfterVecYLd], (instregex "^VP(ADD|SUB)(U? "^VPS(L|R)L(V?)WZrmk(z?)$", "^VPSRA(V?)WZrmk(z?)$")>; -def SPRWriteResGroup166 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup166 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 14; let NumMicroOps = 3; } @@ -1945,70 +1945,70 @@ def SPRWriteResGroup168 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort05, SPRPort0 } def : InstRW<[SPRWriteResGroup168], (instrs MWAITrr)>; -def SPRWriteResGroup169 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup169 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[SPRWriteResGroup169], (instrs OUT16ir)>; -def SPRWriteResGroup170 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup170 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 6, 27, 15, 7, 1, 16, 1]; let Latency = 35; let NumMicroOps = 79; } def : InstRW<[SPRWriteResGroup170], (instrs OUT16rr)>; -def SPRWriteResGroup171 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup171 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[SPRWriteResGroup171], (instrs OUT32ir)>; -def SPRWriteResGroup172 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup172 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 6, 29, 15, 9, 1, 18, 1]; let Latency = 35; let NumMicroOps = 85; } def : InstRW<[SPRWriteResGroup172], (instrs OUT32rr)>; -def SPRWriteResGroup173 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup173 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[SPRWriteResGroup173], (instrs OUT8ir)>; -def SPRWriteResGroup174 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup174 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [5, 5, 26, 15, 5, 1, 15, 1]; let Latency = 35; let NumMicroOps = 73; } def : InstRW<[SPRWriteResGroup174], (instrs OUT8rr)>; -def SPRWriteResGroup175 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup175 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 6, 25, 16, 7, 1, 17, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 80; } def : InstRW<[SPRWriteResGroup175], (instrs OUTSB)>; -def SPRWriteResGroup176 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup176 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [7, 6, 28, 16, 10, 1, 20, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 89; } def : InstRW<[SPRWriteResGroup176], (instrs OUTSL)>; -def SPRWriteResGroup177 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup177 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 83; } def : InstRW<[SPRWriteResGroup177], (instrs OUTSW)>; -def SPRWriteResGroup178 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup178 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -2028,7 +2028,7 @@ def : InstRW<[SPRWriteResGroup178, ReadAfterVecXLd], (instregex "^(V?)P(ADD|SUB) "^VPTERNLOG(D|Q)Z128rmi((kz)?)$")>; def : InstRW<[SPRWriteResGroup178, ReadAfterVecXLd], (instrs VPBLENDDrmi)>; -def SPRWriteResGroup179 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup179 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 8; let NumMicroOps = 2; } @@ -2045,7 +2045,7 @@ def SPRWriteResGroup180 : SchedWriteRes<[SPRPort00_06, SPRPort05]> { } def : InstRW<[SPRWriteResGroup180], (instrs PAUSE)>; -def SPRWriteResGroup181 : SchedWriteRes<[SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup181 : SchedWriteRes<[SPRPort01, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -2059,7 +2059,7 @@ def : InstRW<[SPRWriteResGroup182], (instregex "^(V?)PEXTR(D|Q)mri$", "^VPEXTR(D|Q)Zmri$", "^VPMOVQDZ128mr(k?)$")>; -def SPRWriteResGroup183 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup183 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 9; let NumMicroOps = 4; @@ -2074,48 +2074,48 @@ def SPRWriteResGroup184 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> { def : InstRW<[SPRWriteResGroup184], (instregex "^(V?)PH(ADD|SUB)SWrr$", "^VPH(ADD|SUB)SWYrr$")>; -def SPRWriteResGroup185 : SchedWriteRes<[SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup185 : SchedWriteRes<[SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 12; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup185], (instregex "^POP(16|32|64)rmm$", "^PUSH(16|32)rmm$")>; -def SPRWriteResGroup186 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup186 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [6, 2, 1, 1]; let Latency = 5; let NumMicroOps = 10; } def : InstRW<[SPRWriteResGroup186], (instrs POPF16)>; -def SPRWriteResGroup187 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup187 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 5; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup187], (instrs POPF64)>; -def SPRWriteResGroup188 : SchedWriteRes<[SPRPort02_03_11]> { +def SPRWriteResGroup188 : SchedWriteRes<[SPRPort02_03_10]> { let Latency = 0; } def : InstRW<[SPRWriteResGroup188], (instregex "^PREFETCHT(0|1|2)$")>; def : InstRW<[SPRWriteResGroup188], (instrs PREFETCHNTA)>; -def SPRWriteResGroup189 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup189 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup189], (instregex "^PTWRITE((64)?)m$")>; -def SPRWriteResGroup190 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> { +def SPRWriteResGroup190 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort06]> { let ReleaseAtCycles = [1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup190], (instrs PTWRITE64r)>; -def SPRWriteResGroup191 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> { +def SPRWriteResGroup191 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort06]> { let ReleaseAtCycles = [2, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 4; @@ -2127,7 +2127,7 @@ def SPRWriteResGroup192 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup192], (instregex "^PUSH64r((mr)?)$")>; -def SPRWriteResGroup193 : SchedWriteRes<[SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup193 : SchedWriteRes<[SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup193], (instrs PUSH64rmm)>; @@ -2143,49 +2143,49 @@ def SPRWriteResGroup195 : SchedWriteRes<[SPRPort01, SPRPort04_09, SPRPort07_08]> } def : InstRW<[SPRWriteResGroup195], (instregex "^PUSH(F|G)S64$")>; -def SPRWriteResGroup196 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup196 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 3, 2]; let Latency = 8; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup196], (instregex "^RC(L|R)(16|32|64)rCL$")>; -def SPRWriteResGroup197 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup197 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [1, 2]; let Latency = 13; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup197, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>; -def SPRWriteResGroup198 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup198 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [1, 5, 2]; let Latency = 20; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup198, WriteRMW], (instrs RCL8mCL)>; -def SPRWriteResGroup199 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup199 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 5, 2]; let Latency = 7; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup199], (instrs RCL8rCL)>; -def SPRWriteResGroup200 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup200 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [2, 4, 3]; let Latency = 20; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup200, WriteRMW], (instrs RCR8mCL)>; -def SPRWriteResGroup201 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup201 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [3, 4, 3]; let Latency = 9; let NumMicroOps = 10; } def : InstRW<[SPRWriteResGroup201], (instrs RCR8rCL)>; -def SPRWriteResGroup202 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort01_05_10, SPRPort05]> { +def SPRWriteResGroup202 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort01_05_11, SPRPort05]> { let ReleaseAtCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 54; @@ -2197,48 +2197,48 @@ def SPRWriteResGroup203 : SchedWriteRes<[SPRPort01]> { } def : InstRW<[SPRWriteResGroup203], (instrs RDPID64)>; -def SPRWriteResGroup204 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup204 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup204], (instrs RDPKRUr)>; -def SPRWriteResGroup205 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> { +def SPRWriteResGroup205 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort05]> { let ReleaseAtCycles = [9, 6, 2, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 18; } def : InstRW<[SPRWriteResGroup205], (instrs RDPMC)>; -def SPRWriteResGroup206 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup206 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = 1386; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup206], (instrs RDRAND16r)>; -def SPRWriteResGroup207 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup207 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 2, 5, 7, 3, 1, 2]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup207], (instregex "^RDRAND(32|64)r$")>; -def SPRWriteResGroup208 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup208 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = 1381; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup208], (instrs RDSEED16r)>; -def SPRWriteResGroup209 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup209 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 3, 3, 5, 7, 1, 4]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 25; } def : InstRW<[SPRWriteResGroup209], (instregex "^RDSEED(32|64)r$")>; -def SPRWriteResGroup210 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> { +def SPRWriteResGroup210 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort05]> { let ReleaseAtCycles = [5, 6, 3, 1]; let Latency = 18; let NumMicroOps = 15; @@ -2252,13 +2252,13 @@ def SPRWriteResGroup211 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_0 } def : InstRW<[SPRWriteResGroup211], (instrs RDTSCP)>; -def SPRWriteResGroup212 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup212 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup212], (instrs RET64)>; -def SPRWriteResGroup213 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup213 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 6; let NumMicroOps = 3; @@ -2290,7 +2290,7 @@ def : InstRW<[SPRWriteResGroup217, WriteRMW], (instregex "^RO(L|R)8m(1|i)$", "^(RO|SH)L8mCL$", "^(RO|SA|SH)R8mCL$")>; -def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 15; let NumMicroOps = 3; @@ -2329,7 +2329,7 @@ def SPRWriteResGroup221 : SchedWriteRes<[SPRPort00_06]> { def : InstRW<[SPRWriteResGroup221, WriteRMW], (instregex "^S(A|H)R8m(1|i)$", "^SHL8m(1|i)$")>; -def SPRWriteResGroup222 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup222 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -2342,7 +2342,7 @@ def SPRWriteResGroup223 : SchedWriteRes<[SPRPort00_06]> { def : InstRW<[SPRWriteResGroup223], (instregex "^S(A|H)RX(32|64)rr$", "^SHLX(32|64)rr$")>; -def SPRWriteResGroup224 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup224 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 7; @@ -2355,14 +2355,14 @@ def SPRWriteResGroup225 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup225], (instrs SFENCE)>; -def SPRWriteResGroup226 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup226 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 2, 2]; let Latency = 21; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup226], (instregex "^S(G|I)DT64m$")>; -def SPRWriteResGroup227 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup227 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 9; let NumMicroOps = 3; } @@ -2374,7 +2374,7 @@ def SPRWriteResGroup228 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup228], (instrs SHA1MSG1rr)>; -def SPRWriteResGroup229 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup229 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 2, 1, 2, 1]; let Latency = 13; let NumMicroOps = 8; @@ -2388,7 +2388,7 @@ def SPRWriteResGroup230 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup230], (instrs SHA1MSG2rr)>; -def SPRWriteResGroup231 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup231 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 4; } @@ -2400,7 +2400,7 @@ def SPRWriteResGroup232 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup232], (instrs SHA1NEXTErr)>; -def SPRWriteResGroup233 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup233 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 13; let NumMicroOps = 2; } @@ -2432,7 +2432,7 @@ def SPRWriteResGroup234 : SchedWriteRes<[SPRPort05]> { def : InstRW<[SPRWriteResGroup234], (instrs SHA1RNDS4rri, SHA256RNDS2rr)>; -def SPRWriteResGroup235 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup235 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [3, 2, 1, 1, 1]; let Latency = 12; let NumMicroOps = 8; @@ -2457,64 +2457,64 @@ def : InstRW<[SPRWriteResGroup237], (instregex "^VPMOV(D|Q|W|SQ|SW)BZrrk(z?)$", "^VPMOVUS(Q|W)BZrrk(z?)$")>; def : InstRW<[SPRWriteResGroup237], (instrs SHA256MSG2rr)>; -def SPRWriteResGroup238 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup238 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08]> { let Latency = 13; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup238], (instrs SHRD16mri8)>; -def SPRWriteResGroup239 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup239 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup239], (instregex "^SLDT(32|64)r$")>; -def SPRWriteResGroup240 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort05]> { +def SPRWriteResGroup240 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort05]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup240], (instrs SMSW16r)>; -def SPRWriteResGroup241 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort05]> { +def SPRWriteResGroup241 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort05]> { let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup241], (instregex "^SMSW(32|64)r$")>; -def SPRWriteResGroup242 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup242 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 24; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup242, ReadAfterVecLd], (instregex "^(V?)SQRTSDm_Int$")>; def : InstRW<[SPRWriteResGroup242, ReadAfterVecLd], (instrs VSQRTSDZm_Int)>; -def SPRWriteResGroup243 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup243 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let Latency = 6; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup243], (instrs STD)>; -def SPRWriteResGroup244 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup244 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [1, 4, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup244], (instrs STI)>; -def SPRWriteResGroup245 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup245 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 8; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup245], (instrs STOSB)>; -def SPRWriteResGroup246 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup246 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 7; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup246], (instregex "^STOS(L|Q|W)$")>; -def SPRWriteResGroup247 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> { +def SPRWriteResGroup247 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort01]> { let Latency = 5; let NumMicroOps = 2; } @@ -2602,7 +2602,7 @@ def : InstRW<[SPRWriteResGroup253], (instregex "^V(ADD|SUB)PHZrr(bk|kz)$", "^VM(AX|IN|UL)PHZrr(bk|kz)$", "^VM(AX|IN|UL)PHZrr(k|bkz)$")>; -def SPRWriteResGroup254 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup254 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -2617,7 +2617,7 @@ def : InstRW<[SPRWriteResGroup254, ReadAfterVecYLd], (instrs VADDSUBPSYrm)>; def : InstRW<[SPRWriteResGroup254, ReadAfterVecXLd], (instregex "^VPSHUFBZ128rmk(z?)$", "^VPUNPCK(H|L)(BW|WD)Z128rmk(z?)$")>; -def SPRWriteResGroup255 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup255 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -2633,7 +2633,7 @@ def SPRWriteResGroup256 : SchedWriteRes<[SPRPort00_05]> { def : InstRW<[SPRWriteResGroup256], (instregex "^V(ADD|SUB)PSZrr(bk|kz)$", "^V(ADD|SUB)PSZrr(k|bkz)$")>; -def SPRWriteResGroup257 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup257 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 2; } @@ -2647,7 +2647,7 @@ def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd], (instrs VGF2P8MULBZrm)>; def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd, ReadAfterVecYLd], (instregex "^VPMADD52(H|L)UQZm((b|k|bk|kz)?)$", "^VPMADD52(H|L)UQZmbkz$")>; -def SPRWriteResGroup258 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup258 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 11; let NumMicroOps = 2; } @@ -2673,7 +2673,7 @@ def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instregex "^VPALIGNRZ128rm "^VPCLMULQDQ(Y|Z)rmi$")>; def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instrs VPCLMULQDQZ256rmi)>; -def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 10; let NumMicroOps = 4; @@ -2690,7 +2690,7 @@ def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rrr$", "^VBLENDVP(D|SY)rrr$", "^VPBLENDVB(Y?)rrr$")>; -def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 9; let NumMicroOps = 4; @@ -2698,7 +2698,7 @@ def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rmr$")>; def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrmr)>; -def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -2718,7 +2718,7 @@ def : InstRW<[SPRWriteResGroup262, ReadAfterVecYLd], (instregex "^VINSERT(F|I)12 "^VPTERNLOG(D|Q)Z256rmbik(z?)$", "^VPTERNLOG(D|Q)Z256rmi((kz)?)$")>; -def SPRWriteResGroup263 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup263 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let Latency = 3; let NumMicroOps = 2; } @@ -2748,7 +2748,7 @@ def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrm "^VCMPS(D|H|S)Zrmi_Int(k?)$", "^VFPCLASSS(D|H|S)Zmik$")>; -def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 2; } @@ -2806,14 +2806,14 @@ def : InstRW<[SPRWriteResGroup268], (instregex "^VCVT(U?)DQ2PDZrr((k|kz)?)$", "^VCVT(U?)QQ2PSZrr((b|k|bk|kz)?)$", "^VCVT(U?)QQ2PSZrrbkz$")>; -def SPRWriteResGroup269 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup269 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup269], (instregex "^VCVT(U?)DQ2PHZ128rm(b?)$", "^VCVTNEPS2BF16Z128rm(b?)$")>; -def SPRWriteResGroup270 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup270 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 19; let NumMicroOps = 4; } @@ -2832,7 +2832,7 @@ def SPRWriteResGroup272 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup272], (instregex "^VCVT(U?)DQ2PHZ128rrk(z?)$")>; -def SPRWriteResGroup273 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup273 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 17; let NumMicroOps = 4; } @@ -2840,7 +2840,7 @@ def : InstRW<[SPRWriteResGroup273], (instregex "^VCVT(U?)DQ2PHZ256rm(b?)$", "^VCVTNEPS2BF16Z128rm(bk|kz)$", "^VCVTNEPS2BF16Z128rm(k|bkz)$")>; -def SPRWriteResGroup274 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup274 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 21; let NumMicroOps = 4; } @@ -2859,14 +2859,14 @@ def SPRWriteResGroup276 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup276], (instregex "^VCVT(U?)DQ2PHZ256rrk(z?)$")>; -def SPRWriteResGroup277 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup277 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup277], (instregex "^VCVT(U?)DQ2PHZrm(b?)$")>; -def SPRWriteResGroup278 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup278 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 21; let NumMicroOps = 4; @@ -2889,14 +2889,14 @@ def SPRWriteResGroup280 : SchedWriteRes<[SPRPort00, SPRPort05]> { def : InstRW<[SPRWriteResGroup280], (instregex "^VCVT(U?)DQ2PHZrr(bk|kz)$", "^VCVT(U?)DQ2PHZrr(k|bkz)$")>; -def SPRWriteResGroup281 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup281 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 15; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup281, ReadAfterVecXLd], (instregex "^VCVTNE2PS2BF16Z128rm(b?)$")>; -def SPRWriteResGroup282 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup282 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 17; let NumMicroOps = 5; @@ -2918,14 +2918,14 @@ def SPRWriteResGroup284 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup284], (instregex "^VCVTNE2PS2BF16Z(128|256)rrk(z?)$")>; -def SPRWriteResGroup285 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup285 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 16; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup285, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(b?)$")>; -def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1]; let Latency = 18; let NumMicroOps = 5; @@ -2933,7 +2933,7 @@ def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_0 def : InstRW<[SPRWriteResGroup286, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(bk|kz)$", "^VCVTNE2PS2BF16Z256rm(k|bkz)$")>; -def SPRWriteResGroup287 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup287 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 16; let NumMicroOps = 5; @@ -2942,7 +2942,7 @@ def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16 "^VDPBF16PSZm((b|k|bk|kz)?)$")>; def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instrs VDPBF16PSZmbkz)>; -def SPRWriteResGroup288 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup288 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 18; let NumMicroOps = 5; @@ -2977,27 +2977,27 @@ def SPRWriteResGroup292 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> } def : InstRW<[SPRWriteResGroup292], (instregex "^VCVTNEPS2BF16Z(128|256)rrk(z?)$")>; -def SPRWriteResGroup293 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup293 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 16; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup293], (instregex "^VCVTNEPS2BF16Z256rm(b?)$")>; -def SPRWriteResGroup294 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup294 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup294], (instregex "^VCVTNEPS2BF16Z256rm(bk|kz)$", "^VCVTNEPS2BF16Z256rm(k|bkz)$")>; -def SPRWriteResGroup295 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup295 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup295], (instregex "^VCVTNEPS2BF16Zrm(b?)$")>; -def SPRWriteResGroup296 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup296 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 18; let NumMicroOps = 4; @@ -3019,7 +3019,7 @@ def SPRWriteResGroup298 : SchedWriteRes<[SPRPort00, SPRPort05]> { } def : InstRW<[SPRWriteResGroup298], (instregex "^VCVTNEPS2BF16Zrrk(z?)$")>; -def SPRWriteResGroup299 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup299 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 3; } @@ -3033,7 +3033,7 @@ def : InstRW<[SPRWriteResGroup299], (instregex "^VCVT(T?)PD2DQYrm$", "^VCVT(U?)QQ2PSZ256rm((b|k|bk|kz)?)$", "^VCVT(U?)QQ2PSZ256rmbkz$")>; -def SPRWriteResGroup300 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup300 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 3; } @@ -3044,14 +3044,14 @@ def : InstRW<[SPRWriteResGroup300], (instregex "^VCVT(T?)P(D|H)2(U?)DQZrm(b?)$", "^VCVT(U?)QQ2PSZrm((b|k|bk|kz)?)$", "^VCVT(U?)QQ2PSZrmbkz$")>; -def SPRWriteResGroup301 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup301 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 19; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup301], (instregex "^VCVTPD2PHZ128rm(b?)$")>; -def SPRWriteResGroup302 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup302 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 22; let NumMicroOps = 7; @@ -3073,14 +3073,14 @@ def SPRWriteResGroup304 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup304], (instregex "^VCVTPD2PHZ128rrk(z?)$")>; -def SPRWriteResGroup305 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup305 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 21; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup305], (instregex "^VCVTPD2PHZ256rm(b?)$")>; -def SPRWriteResGroup306 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup306 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 24; let NumMicroOps = 6; @@ -3102,14 +3102,14 @@ def SPRWriteResGroup308 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup308], (instregex "^VCVTPD2PHZ256rrk(z?)$")>; -def SPRWriteResGroup309 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup309 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 23; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup309], (instregex "^VCVTP(D2PH|H2PD)Zrm(b?)$")>; -def SPRWriteResGroup310 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup310 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 2]; let Latency = 26; let NumMicroOps = 6; @@ -3132,7 +3132,7 @@ def SPRWriteResGroup312 : SchedWriteRes<[SPRPort00, SPRPort05]> { def : InstRW<[SPRWriteResGroup312], (instregex "^VCVTP(D2PH|H2PD)Zrr(bk|kz)$", "^VCVTP(D2PH|H2PD)Zrr(k|bkz)$")>; -def SPRWriteResGroup313 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup313 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -3174,14 +3174,14 @@ def : InstRW<[SPRWriteResGroup314], (instregex "^VCVT(T?)PD2(U?)QQZ(128|256)rr(( "^VSCALEFS(D|S)Zrrb_Int((k|kz)?)$")>; def : InstRW<[SPRWriteResGroup314, ReadAfterVecLd], (instregex "^VFIXUPIMMS(D|S)Zrrib((k|kz)?)$")>; -def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 14; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup315], (instregex "^VCVT(T?)PH2(U?)DQZ128rm(b?)$", "^VCVTPS2PHXZ128rm(b?)$")>; -def SPRWriteResGroup316 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup316 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 17; let NumMicroOps = 3; } @@ -3195,7 +3195,7 @@ def SPRWriteResGroup317 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { def : InstRW<[SPRWriteResGroup317], (instregex "^VCVT(T?)PH2(U?)DQZ(128|256)rrk(z?)$", "^VCVTP(H2PS|S2PH)(X?)Z256rrk(z?)$")>; -def SPRWriteResGroup318 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup318 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 3; } @@ -3204,7 +3204,7 @@ def : InstRW<[SPRWriteResGroup318], (instregex "^VCVT(T?)PH2(U?)DQZ256rm(bk|kz)$ "^VCVTP(H2PS|S2PH)XZ256rm(bk|kz)$", "^VCVTP(H2PS|S2PH)XZ256rm(k|bkz)$")>; -def SPRWriteResGroup319 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup319 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 3; } @@ -3231,14 +3231,14 @@ def : InstRW<[SPRWriteResGroup321], (instregex "^VCVT(T?)PH2(U?)DQZrr(bk|kz)$", "^VCVTP(H2PS|S2PH)XZrr(bk|kz)$", "^VCVTP(H2PS|S2PH)XZrr(k|bkz)$")>; -def SPRWriteResGroup322 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup322 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 23; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup322], (instregex "^VCVTPH2PDZ128rm(b?)$")>; -def SPRWriteResGroup323 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup323 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1, 1, 2]; let Latency = 26; let NumMicroOps = 7; @@ -3260,14 +3260,14 @@ def SPRWriteResGroup325 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup325], (instregex "^VCVTPH2PDZ128rrk(z?)$")>; -def SPRWriteResGroup326 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup326 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 22; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup326], (instregex "^VCVTPH2PDZ256rm(b?)$")>; -def SPRWriteResGroup327 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup327 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 25; let NumMicroOps = 5; @@ -3295,7 +3295,7 @@ def SPRWriteResGroup330 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup330], (instregex "^VCVTP(H2PS|S2PH)(X?)Z128rrk(z?)$")>; -def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 14; let NumMicroOps = 2; } @@ -3308,7 +3308,7 @@ def : InstRW<[SPRWriteResGroup331, ReadAfterVecYLd], (instregex "^VPMADDUBSWZ256 "^VPMULH((U|RS)?)WZ256rmk(z?)$", "^VPMULLWZ256rmk(z?)$")>; -def SPRWriteResGroup332 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup332 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 13; let NumMicroOps = 3; } @@ -3317,7 +3317,7 @@ def : InstRW<[SPRWriteResGroup332], (instregex "^VCVT(T?)PS2(U?)QQZrm((b|k|bk|kz def : InstRW<[SPRWriteResGroup332], (instrs VCVTPH2PSZrm)>; def : InstRW<[SPRWriteResGroup332, ReadAfterVecYLd], (instregex "^VPERMWZrmk(z?)$")>; -def SPRWriteResGroup333 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup333 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 17; let NumMicroOps = 6; @@ -3332,7 +3332,7 @@ def SPRWriteResGroup334 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup334], (instregex "^VCVT(T?)PH2(U?)QQZ(128|256)rr((k|kz)?)$")>; -def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2, 1, 1, 1]; let Latency = 18; let NumMicroOps = 6; @@ -3340,7 +3340,7 @@ def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 def : InstRW<[SPRWriteResGroup335], (instregex "^VCVT(T?)PH2(U?)QQZ256rm((b|k|bk|kz)?)$", "^VCVT(T?)PH2(U?)QQZ256rmbkz$")>; -def SPRWriteResGroup336 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup336 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 16; let NumMicroOps = 3; } @@ -3348,7 +3348,7 @@ def : InstRW<[SPRWriteResGroup336], (instregex "^VCVTPS2PHXZ128rm(bk|kz)$", "^VCVTPS2PHXZ128rm(k|bkz)$", "^VCVTPS2PHXZ256rm(b?)$")>; -def SPRWriteResGroup337 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup337 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 16; let NumMicroOps = 3; } @@ -3373,13 +3373,13 @@ def SPRWriteResGroup340 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { def : InstRW<[SPRWriteResGroup340], (instregex "^VCVT(T?)PS2(U?)QQZ128rr((k|kz)?)$", "^VCVT(U?)QQ2PSZ128rr((k|kz)?)$")>; -def SPRWriteResGroup341 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup341 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 15; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup341], (instregex "^VCVT(U?)QQ2PHZ128rm(b?)$")>; -def SPRWriteResGroup342 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup342 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 17; let NumMicroOps = 5; } @@ -3399,13 +3399,13 @@ def SPRWriteResGroup344 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 def : InstRW<[SPRWriteResGroup344], (instregex "^VCVT(U?)QQ2PHZ128rrk(z?)$", "^VCVT(U?)QQ2PHZ256rr$")>; -def SPRWriteResGroup345 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup345 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 18; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup345], (instregex "^VCVT(U?)QQ2PHZ256rm(b?)$")>; -def SPRWriteResGroup346 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup346 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let Latency = 20; let NumMicroOps = 5; } @@ -3418,14 +3418,14 @@ def SPRWriteResGroup347 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup347], (instregex "^VCVT(U?)QQ2PHZ256rrk(z?)$")>; -def SPRWriteResGroup348 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup348 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 18; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup348], (instregex "^VCVT(U?)QQ2PHZrm(b?)$")>; -def SPRWriteResGroup349 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup349 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 20; let NumMicroOps = 5; @@ -3448,14 +3448,14 @@ def SPRWriteResGroup351 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup351], (instregex "^VCVT(U?)QQ2PHZrr(bk|kz)$", "^VCVT(U?)QQ2PHZrr(k|bkz)$")>; -def SPRWriteResGroup352 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup352 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = 18; let NumMicroOps = 7; } def : InstRW<[SPRWriteResGroup352, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm((_Int)?)$")>; -def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 2, 1, 1, 1]; let Latency = 21; let NumMicroOps = 7; @@ -3477,14 +3477,14 @@ def SPRWriteResGroup355 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)_Intk(z?)$")>; -def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 18; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup356, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm((_Int)?)$")>; -def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 20; let NumMicroOps = 4; @@ -3506,7 +3506,7 @@ def SPRWriteResGroup359 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)_Intk(z?)$")>; -def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_10]> { let Latency = 13; let NumMicroOps = 3; } @@ -3525,14 +3525,14 @@ def SPRWriteResGroup362 : SchedWriteRes<[SPRPort00_01]> { } def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)_Intk(z?)$")>; -def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 14; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup363, ReadAfterVecLd], (instregex "^VCVT(U?)SI((64)?)2SHZrm((_Int)?)$", "^VCVTSS2SHZrm((_Int)?)$")>; -def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 16; let NumMicroOps = 3; } @@ -3572,14 +3572,14 @@ def : InstRW<[SPRWriteResGroup367], (instregex "^VDBPSADBWZ(128|256)rrik(z?)$", "^VPOPCNT(B|W)Z(128|256)rrk(z?)$", "^VPOPCNT(B|W)Zrrk(z?)$")>; -def SPRWriteResGroup368 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup368 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 36; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup368, ReadAfterVecXLd], (instregex "^VDIVPHZ128rm(b?)$")>; -def SPRWriteResGroup369 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup369 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 38; let NumMicroOps = 4; @@ -3603,14 +3603,14 @@ def : InstRW<[SPRWriteResGroup371], (instregex "^VDIVPHZ(128|256)rrk$", "^VSQRTPHZ(128|256)r$")>; def : InstRW<[SPRWriteResGroup371], (instrs VDIVPHZ128rrkz)>; -def SPRWriteResGroup372 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup372 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 37; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup372, ReadAfterVecYLd], (instregex "^VDIVPHZ256rm(b?)$")>; -def SPRWriteResGroup373 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup373 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 39; let NumMicroOps = 4; @@ -3626,14 +3626,14 @@ def SPRWriteResGroup374 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup374], (instrs VDIVPHZ256rrkz)>; -def SPRWriteResGroup375 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup375 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 49; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup375, ReadAfterVecYLd], (instregex "^VDIVPHZrm(b?)$")>; -def SPRWriteResGroup376 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup376 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 51; let NumMicroOps = 9; @@ -3663,7 +3663,7 @@ def SPRWriteResGroup379 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup379], (instrs VDIVPSZrr)>; -def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 21; let NumMicroOps = 2; } @@ -3676,7 +3676,7 @@ def SPRWriteResGroup381 : SchedWriteRes<[SPRPort00]> { def : InstRW<[SPRWriteResGroup381], (instrs VDIVSHZrr_Int, VSQRTSHZr_Int)>; -def SPRWriteResGroup382 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup382 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 15; let NumMicroOps = 5; @@ -3691,7 +3691,7 @@ def SPRWriteResGroup383 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup383], (instregex "^VDPBF16PSZ(128|256)r((k|kz)?)$")>; -def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 2]; let Latency = 16; let NumMicroOps = 5; @@ -3699,35 +3699,35 @@ def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort0 def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instregex "^VDPBF16PSZ256m((b|k|bk|kz)?)$")>; def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instrs VDPBF16PSZ256mbkz)>; -def SPRWriteResGroup385 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup385 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [6, 7, 18]; let Latency = 81; let NumMicroOps = 31; } def : InstRW<[SPRWriteResGroup385], (instrs VERRm)>; -def SPRWriteResGroup386 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup386 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [6, 7, 17]; let Latency = 74; let NumMicroOps = 30; } def : InstRW<[SPRWriteResGroup386], (instrs VERRr)>; -def SPRWriteResGroup387 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup387 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 8, 21]; let Latency = 81; let NumMicroOps = 34; } def : InstRW<[SPRWriteResGroup387], (instrs VERWm)>; -def SPRWriteResGroup388 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> { +def SPRWriteResGroup388 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_10]> { let ReleaseAtCycles = [5, 8, 20]; let Latency = 74; let NumMicroOps = 33; } def : InstRW<[SPRWriteResGroup388], (instrs VERWr)>; -def SPRWriteResGroup389 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup389 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 2]; let Latency = 10; let NumMicroOps = 3; @@ -3736,7 +3736,7 @@ def : InstRW<[SPRWriteResGroup389, ReadAfterVecYLd], (instregex "^VEXPANDP(D|S)Z "^VPEXPAND(B|D|Q|W)Z128rm$", "^VPEXPAND(D|Q)Z128rmk(z?)$")>; -def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 16; let NumMicroOps = 3; @@ -3755,7 +3755,7 @@ def : InstRW<[SPRWriteResGroup390, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZ2 "^VSCALEFPHZ256rm(b?)$")>; def : InstRW<[SPRWriteResGroup390, ReadAfterVecLd], (instrs VSCALEFSHZrm)>; -def SPRWriteResGroup391 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup391 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 21; let NumMicroOps = 3; @@ -3797,7 +3797,7 @@ def : InstRW<[SPRWriteResGroup393], (instregex "^VF(C?)MADDCPHZ(128|256)rk(z?)$" "^VF(C?)MULCSHZrr(bk|kz)$", "^VF(C?)MULCSHZrr(k|bkz)$")>; -def SPRWriteResGroup394 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup394 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 16; let NumMicroOps = 3; @@ -3808,7 +3808,7 @@ def : InstRW<[SPRWriteResGroup394, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZr "^VRNDSCALEP(D|S)Zrm(b?)ik(z?)$", "^VSCALEFPHZrm(b?)$")>; -def SPRWriteResGroup395 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup395 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 21; let NumMicroOps = 3; @@ -3838,7 +3838,7 @@ def : InstRW<[SPRWriteResGroup397], (instregex "^VF(C?)MADDCPHZr(bk|kz)$", "^VF(C?)MULCPHZrr(bk|kz)$", "^VF(C?)MULCPHZrr(k|bkz)$")>; -def SPRWriteResGroup398 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup398 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 29; let NumMicroOps = 8; @@ -3848,7 +3848,7 @@ def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSYrm, VPGATHERQDYrm)>; -def SPRWriteResGroup399 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup399 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 20; let NumMicroOps = 4; @@ -3858,7 +3858,7 @@ def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm)>; -def SPRWriteResGroup400 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup400 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 4]; let Latency = 28; let NumMicroOps = 7; @@ -3868,7 +3868,7 @@ def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm)>; -def SPRWriteResGroup401 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup401 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 8, 2]; let Latency = 28; let NumMicroOps = 11; @@ -3878,7 +3878,7 @@ def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZrm, VPGATHERQDZrm)>; -def SPRWriteResGroup402 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup402 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 1, 2]; let Latency = 20; let NumMicroOps = 5; @@ -3888,7 +3888,7 @@ def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instregex "^ def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSrm, VPGATHERQDrm)>; -def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 8]; let Latency = 30; let NumMicroOps = 12; @@ -3896,7 +3896,7 @@ def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_0 def : InstRW<[SPRWriteResGroup403, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; -def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 4]; let Latency = 27; let NumMicroOps = 7; @@ -3904,7 +3904,7 @@ def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_1 def : InstRW<[SPRWriteResGroup404, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZ128rm, VPGATHERDDZ128rm)>; -def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 8]; let Latency = 29; let NumMicroOps = 11; @@ -3912,7 +3912,7 @@ def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_1 def : InstRW<[SPRWriteResGroup405, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm)>; -def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 16, 2]; let Latency = 30; let NumMicroOps = 19; @@ -3920,7 +3920,7 @@ def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> def : InstRW<[SPRWriteResGroup406, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; -def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 1, 2, 4]; let Latency = 28; let NumMicroOps = 8; @@ -3928,7 +3928,7 @@ def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_0 def : InstRW<[SPRWriteResGroup407, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSrm, VPGATHERDDrm)>; -def SPRWriteResGroup408 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup408 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 15; let NumMicroOps = 2; } @@ -3949,7 +3949,7 @@ def SPRWriteResGroup410 : SchedWriteRes<[SPRPort00_01]> { def : InstRW<[SPRWriteResGroup410], (instregex "^VGF2P8AFFINE((INV)?)QBZ(128|256)rrikz$", "^VGF2P8MULBZ(128|256)rrkz$")>; -def SPRWriteResGroup411 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup411 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 15; let NumMicroOps = 2; } @@ -3975,20 +3975,20 @@ def SPRWriteResGroup414 : SchedWriteRes<[SPRPort01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup414], (instregex "^VH(ADD|SUB)P(D|S)rr$")>; -def SPRWriteResGroup415 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11]> { +def SPRWriteResGroup415 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_10]> { let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup415], (instrs VLDMXCSR)>; -def SPRWriteResGroup416 : SchedWriteRes<[SPRPort01, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> { +def SPRWriteResGroup416 : SchedWriteRes<[SPRPort01, SPRPort01_05, SPRPort02_03, SPRPort02_03_10, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> { let ReleaseAtCycles = [1, 1, 1, 8, 1, 1, 2, 3]; let Latency = 40; let NumMicroOps = 18; } def : InstRW<[SPRWriteResGroup416], (instrs VMCLEARm)>; -def SPRWriteResGroup417 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup417 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 2; } @@ -4008,7 +4008,7 @@ def : InstRW<[SPRWriteResGroup418], (instregex "^VMOVDQU(8|16)Z(128|256)rrk(z?)( "^VPBLENDM(B|W)Z(128|256)rrk(z?)$", "^VPMOVM2(B|W)Z(128|256)rk$")>; -def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 2, 2]; let Latency = 12; let NumMicroOps = 5; @@ -4082,7 +4082,7 @@ def SPRWriteResGroup430 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> { } def : InstRW<[SPRWriteResGroup430], (instrs VMOVNTPSZmr)>; -def SPRWriteResGroup431 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup431 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [3, 1, 8]; let Latency = 10; let NumMicroOps = 12; @@ -4098,7 +4098,7 @@ def SPRWriteResGroup432 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup432], (instrs VP2INTERSECTDZ128rr, VP2INTERSECTQZ256rr)>; -def SPRWriteResGroup433 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup433 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 8, 7, 2, 1, 11]; let Latency = 27; let NumMicroOps = 30; @@ -4112,7 +4112,7 @@ def SPRWriteResGroup434 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_0 } def : InstRW<[SPRWriteResGroup434], (instrs VP2INTERSECTDZ256rr)>; -def SPRWriteResGroup435 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup435 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [13, 9, 1, 23]; let Latency = 40; let NumMicroOps = 46; @@ -4126,7 +4126,7 @@ def SPRWriteResGroup436 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup436], (instrs VP2INTERSECTDZrr)>; -def SPRWriteResGroup437 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup437 : SchedWriteRes<[SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 4]; let Latency = 6; let NumMicroOps = 5; @@ -4140,7 +4140,7 @@ def SPRWriteResGroup438 : SchedWriteRes<[SPRPort05]> { } def : InstRW<[SPRWriteResGroup438], (instrs VP2INTERSECTQZ128rr)>; -def SPRWriteResGroup439 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup439 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [8, 7, 1, 14]; let Latency = 29; let NumMicroOps = 30; @@ -4169,7 +4169,7 @@ def : InstRW<[SPRWriteResGroup441], (instregex "^VP(A|SU)BS(B|W)Z(128|256)rrk(z? "^VPSRAWZ(128|256)rik(z?)$", "^VPSUBUS(B|W)Z(128|256)rrk(z?)$")>; -def SPRWriteResGroup442 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup442 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -4206,21 +4206,21 @@ def SPRWriteResGroup445 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort04_09, def : InstRW<[SPRWriteResGroup445], (instregex "^VPCOMPRESS(B|W)Z(128|256)mrk$")>; def : InstRW<[SPRWriteResGroup445], (instrs VPCOMPRESSWZmrk)>; -def SPRWriteResGroup446 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup446 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 2, 2]; let Latency = 12; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup446], (instrs VPCOMPRESSBZmr)>; -def SPRWriteResGroup447 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup447 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 2, 2, 2]; let Latency = 14; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup447], (instrs VPCOMPRESSBZmrk)>; -def SPRWriteResGroup448 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup448 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [5, 4, 1, 5]; let Latency = 17; let NumMicroOps = 15; @@ -4235,7 +4235,7 @@ def SPRWriteResGroup449 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup449], (instregex "^VPCONFLICTDZ128rr((k|kz)?)$")>; -def SPRWriteResGroup450 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup450 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 5, 1, 1, 9]; let Latency = 24; let NumMicroOps = 23; @@ -4250,7 +4250,7 @@ def SPRWriteResGroup451 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup451], (instregex "^VPCONFLICTDZ256rr((k|kz)?)$")>; -def SPRWriteResGroup452 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup452 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [11, 8, 1, 17]; let Latency = 33; let NumMicroOps = 37; @@ -4272,7 +4272,7 @@ def SPRWriteResGroup454 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup454], (instrs VPCONFLICTDZrrk)>; -def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 11; let NumMicroOps = 4; @@ -4288,7 +4288,7 @@ def SPRWriteResGroup456 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup456], (instregex "^VPCONFLICTQZ128rr((k|kz)?)$")>; -def SPRWriteResGroup457 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup457 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [5, 4, 1, 5]; let Latency = 20; let NumMicroOps = 15; @@ -4303,7 +4303,7 @@ def SPRWriteResGroup458 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 } def : InstRW<[SPRWriteResGroup458], (instregex "^VPCONFLICTQZ256rr((k|kz)?)$")>; -def SPRWriteResGroup459 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup459 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [7, 5, 1, 9]; let Latency = 23; let NumMicroOps = 22; @@ -4325,7 +4325,7 @@ def SPRWriteResGroup461 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup461], (instrs VPCONFLICTQZrrk)>; -def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 13; let NumMicroOps = 4; @@ -4348,14 +4348,14 @@ def SPRWriteResGroup464 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2BZ(128|256)rrk(z?)$", "^VPERM(I|T)2WZ(128|256)rr$")>; -def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 12; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ256rm$")>; -def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 14; let NumMicroOps = 4; @@ -4364,14 +4364,14 @@ def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZ2 def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2WZ128rm, VPERMT2WZ256rm)>; -def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 12; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2BZrm$")>; -def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 14; let NumMicroOps = 4; @@ -4394,7 +4394,7 @@ def SPRWriteResGroup470 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2BZrrk(z?)$", "^VPERM(I|T)2WZrr$")>; -def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; @@ -4409,7 +4409,7 @@ def SPRWriteResGroup472 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2WZ(128|256)rrk(z?)$")>; -def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 15; let NumMicroOps = 4; @@ -4417,21 +4417,21 @@ def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPo def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2WZ128rmk(z?)$")>; def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2WZ256rm)>; -def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2WZ256rmk(z?)$")>; -def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 15; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2WZrm)>; -def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 17; let NumMicroOps = 4; @@ -4445,20 +4445,20 @@ def SPRWriteResGroup477 : SchedWriteRes<[SPRPort00_05, SPRPort05]> { } def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2WZrrk(z?)$")>; -def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [1, 1, 2]; let Latency = 16; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2WZrmk(z?)$")>; -def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 10; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup479, ReadAfterVecYLd], (instrs VPERMWZ128rm)>; -def SPRWriteResGroup480 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup480 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 13; let NumMicroOps = 3; } @@ -4470,13 +4470,13 @@ def SPRWriteResGroup481 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { } def : InstRW<[SPRWriteResGroup481], (instregex "^VPERMWZ(128|256)rr$")>; -def SPRWriteResGroup482 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup482 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let Latency = 11; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup482, ReadAfterVecYLd], (instrs VPERMWZ256rm)>; -def SPRWriteResGroup483 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup483 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 11; let NumMicroOps = 3; } @@ -4490,7 +4490,7 @@ def SPRWriteResGroup484 : SchedWriteRes<[SPRPort05]> { def : InstRW<[SPRWriteResGroup484], (instregex "^VPEXPAND(B|W)Z(128|256)rrk(z?)$", "^VPEXPAND(B|W)Zrrk(z?)$")>; -def SPRWriteResGroup485 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> { +def SPRWriteResGroup485 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [1, 2, 1]; let Latency = 10; let NumMicroOps = 4; @@ -4504,7 +4504,7 @@ def : InstRW<[SPRWriteResGroup486], (instregex "^VPMADDUBSWZ(128|256)rrk(z?)$", "^VPMULH((U|RS)?)WZ(128|256)rrk(z?)$", "^VPMULLWZ(128|256)rrk(z?)$")>; -def SPRWriteResGroup487 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup487 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 14; let NumMicroOps = 2; } @@ -4595,7 +4595,7 @@ def SPRWriteResGroup496 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> } def : InstRW<[SPRWriteResGroup496], (instregex "^VPMOVQDZ((256)?)mrk$")>; -def SPRWriteResGroup497 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup497 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 23; let NumMicroOps = 4; @@ -4612,7 +4612,7 @@ def SPRWriteResGroup498 : SchedWriteRes<[SPRPort00_01]> { } def : InstRW<[SPRWriteResGroup498], (instregex "^VPMULLQZ(128|256)rr((k|kz)?)$")>; -def SPRWriteResGroup499 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup499 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 23; let NumMicroOps = 4; @@ -4627,7 +4627,7 @@ def SPRWriteResGroup500 : SchedWriteRes<[SPRPort00]> { } def : InstRW<[SPRWriteResGroup500], (instregex "^VPMULLQZrr((k|kz)?)$")>; -def SPRWriteResGroup501 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup501 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 1, 4, 4]; let Latency = 12; let NumMicroOps = 11; @@ -4639,7 +4639,7 @@ def : InstRW<[SPRWriteResGroup501], (instrs VPSCATTERDDZ128mr, VSCATTERDPSZ128mr, VSCATTERQPSZ256mr)>; -def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 1, 8, 8]; let Latency = 12; let NumMicroOps = 19; @@ -4647,7 +4647,7 @@ def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_0 def : InstRW<[SPRWriteResGroup502], (instrs VPSCATTERDDZ256mr, VSCATTERDPSZ256mr)>; -def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 16, 16]; let Latency = 19; let NumMicroOps = 35; @@ -4655,7 +4655,7 @@ def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPo def : InstRW<[SPRWriteResGroup503], (instrs VPSCATTERDDZmr, VSCATTERDPSZmr)>; -def SPRWriteResGroup504 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup504 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [1, 1, 1, 2, 2]; let Latency = 12; let NumMicroOps = 7; @@ -4665,7 +4665,7 @@ def : InstRW<[SPRWriteResGroup504], (instregex "^VPSCATTER(D|Q)QZ128mr$", def : InstRW<[SPRWriteResGroup504], (instrs VPSCATTERQDZ128mr, VSCATTERQPSZ128mr)>; -def SPRWriteResGroup505 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> { +def SPRWriteResGroup505 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_11, SPRPort04_09, SPRPort07_08]> { let ReleaseAtCycles = [2, 1, 8, 8]; let Latency = 12; let NumMicroOps = 19; @@ -4675,7 +4675,7 @@ def : InstRW<[SPRWriteResGroup505], (instregex "^VPSCATTER(D|Q)QZmr$", def : InstRW<[SPRWriteResGroup505], (instrs VPSCATTERQDZmr, VSCATTERQPSZmr)>; -def SPRWriteResGroup506 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup506 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 8; let NumMicroOps = 2; } @@ -4685,7 +4685,7 @@ def : InstRW<[SPRWriteResGroup506, ReadAfterVecXLd], (instregex "^VPSH(L|R)D(D|Q "^VPSH(L|R)DV(D|Q)Z128m(b|k|kz)$", "^VPSH(L|R)DV(D|Q)Z128mbk(z?)$")>; -def SPRWriteResGroup507 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup507 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 3; } @@ -4702,7 +4702,7 @@ def SPRWriteResGroup509 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup509], (instregex "^VPSH(L|R)D(D|Q)Z(128|256)rrik(z?)$")>; -def SPRWriteResGroup510 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup510 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -4712,13 +4712,13 @@ def : InstRW<[SPRWriteResGroup510, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q "^VPSH(L|R)DV(D|Q)Z256m(b|k|kz)$", "^VPSH(L|R)DV(D|Q)Z256mbk(z?)$")>; -def SPRWriteResGroup511 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup511 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup511, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q)Z256rm(b?)ik(z?)$")>; -def SPRWriteResGroup512 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup512 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 9; let NumMicroOps = 2; } @@ -4728,7 +4728,7 @@ def : InstRW<[SPRWriteResGroup512, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q "^VPSH(L|R)DV(D|Q)Zm(b|k|kz)$", "^VPSH(L|R)DV(D|Q)Zmbk(z?)$")>; -def SPRWriteResGroup513 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup513 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let Latency = 10; let NumMicroOps = 3; } @@ -4740,7 +4740,7 @@ def SPRWriteResGroup514 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup514], (instregex "^VPSH(L|R)D(D|Q)Zrrik(z?)$")>; -def SPRWriteResGroup515 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup515 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 11; let NumMicroOps = 3; } @@ -4752,13 +4752,13 @@ def SPRWriteResGroup516 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup516], (instregex "^VPSH(L|R)DWZ(128|256)rrik(z?)$")>; -def SPRWriteResGroup517 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup517 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup517, ReadAfterVecYLd], (instregex "^VPSH(L|R)DWZ256rmik(z?)$")>; -def SPRWriteResGroup518 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup518 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let Latency = 12; let NumMicroOps = 3; } @@ -4770,14 +4770,14 @@ def SPRWriteResGroup519 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup519], (instregex "^VPSH(L|R)DWZrrik(z?)$")>; -def SPRWriteResGroup520 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup520 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 6; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup520, ReadAfterVecXLd], (instrs VPSHUFBITQMBZ128rm)>; def : InstRW<[SPRWriteResGroup520, ReadAfterVecYLd], (instregex "^VPSHUFBITQMBZ((256)?)rm$")>; -def SPRWriteResGroup521 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup521 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { let Latency = 8; let NumMicroOps = 3; } @@ -4791,7 +4791,7 @@ def SPRWriteResGroup522 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> { def : InstRW<[SPRWriteResGroup522], (instregex "^VPS(L|R)LWZ128rrk(z?)$", "^VPSRAWZ128rrk(z?)$")>; -def SPRWriteResGroup523 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup523 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 16; let NumMicroOps = 4; @@ -4806,7 +4806,7 @@ def SPRWriteResGroup524 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup524], (instregex "^VRCPPHZrk(z?)$")>; -def SPRWriteResGroup525 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup525 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 20; let NumMicroOps = 4; @@ -4815,7 +4815,7 @@ def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instregex "^VREDUCEPHZ128r def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instrs VREDUCESHZrmi)>; def : InstRW<[SPRWriteResGroup525, ReadAfterVecYLd], (instregex "^VREDUCEPHZ256rm(b?)i$")>; -def SPRWriteResGroup526 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup526 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 22; let NumMicroOps = 4; @@ -4841,14 +4841,14 @@ def : InstRW<[SPRWriteResGroup528], (instregex "^VREDUCEPHZ(128|256)rrik(z?)$", "^VREDUCESHZrri(bk|kz)$", "^VREDUCESHZrri(k|bkz)$")>; -def SPRWriteResGroup529 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup529 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 20; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup529, ReadAfterVecYLd], (instregex "^VREDUCEPHZrm(b?)i$")>; -def SPRWriteResGroup530 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup530 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [3, 1]; let Latency = 22; let NumMicroOps = 4; @@ -4878,7 +4878,7 @@ def SPRWriteResGroup533 : SchedWriteRes<[SPRPort00]> { def : InstRW<[SPRWriteResGroup533], (instregex "^VRNDSCALEP(D|S)Zrri((b|k|bk|kz)?)$", "^VRNDSCALEP(D|S)Zrribkz$")>; -def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { +def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 17; let NumMicroOps = 3; @@ -4903,7 +4903,7 @@ def : InstRW<[SPRWriteResGroup535], (instregex "^VRNDSCALEPHZ(128|256)rrik(z?)$" "^VSCALEFSHZrrb_Intk(z?)$", "^VSCALEFSHZrrk(z?)$")>; -def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 17; let NumMicroOps = 3; @@ -4931,14 +4931,14 @@ def : InstRW<[SPRWriteResGroup538], (instregex "^VRSQRT14P(D|S)Zr$")>; def : InstRW<[SPRWriteResGroup538], (instrs VRSQRT14PSZrk, VRSQRTPHZr)>; -def SPRWriteResGroup539 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup539 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 25; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup539], (instrs VSQRTPDYm)>; def : InstRW<[SPRWriteResGroup539, ReadAfterVecYLd], (instregex "^VSQRTPDZ256m(b?)$")>; -def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> { +def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 20; let NumMicroOps = 2; } @@ -4946,14 +4946,14 @@ def : InstRW<[SPRWriteResGroup540, ReadAfterVecXLd], (instregex "^VSQRTPDZ128m(b "^VSQRTPDZ128m(k|bkz)$")>; def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZm_Intk(z?)$")>; -def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 38; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup541, ReadAfterVecYLd], (instrs VSQRTPDZm)>; -def SPRWriteResGroup542 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> { +def SPRWriteResGroup542 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 39; let NumMicroOps = 4; @@ -4967,7 +4967,7 @@ def SPRWriteResGroup543 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup543], (instrs VSQRTPDZr)>; -def SPRWriteResGroup544 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup544 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 41; let NumMicroOps = 4; @@ -4990,14 +4990,14 @@ def SPRWriteResGroup546 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> { } def : InstRW<[SPRWriteResGroup546], (instrs VSQRTPHZ128rkz)>; -def SPRWriteResGroup547 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup547 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 40; let NumMicroOps = 4; } def : InstRW<[SPRWriteResGroup547, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(b?)$")>; -def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> { +def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; let Latency = 42; let NumMicroOps = 4; @@ -5005,14 +5005,14 @@ def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_0 def : InstRW<[SPRWriteResGroup548, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(bk|kz)$", "^VSQRTPHZ256m(k|bkz)$")>; -def SPRWriteResGroup549 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup549 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 53; let NumMicroOps = 9; } def : InstRW<[SPRWriteResGroup549, ReadAfterVecYLd], (instregex "^VSQRTPHZm(b?)$")>; -def SPRWriteResGroup550 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> { +def SPRWriteResGroup550 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [4, 2, 1, 1, 1]; let Latency = 55; let NumMicroOps = 9; @@ -5042,7 +5042,7 @@ def SPRWriteResGroup553 : SchedWriteRes<[SPRPort00, SPRPort00_05]> { } def : InstRW<[SPRWriteResGroup553], (instrs VSQRTPSZr)>; -def SPRWriteResGroup554 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10]> { +def SPRWriteResGroup554 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11]> { let ReleaseAtCycles = [1, 2, 3, 3, 1]; let Latency = 12; let NumMicroOps = 10; @@ -5063,42 +5063,42 @@ def SPRWriteResGroup556 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, } def : InstRW<[SPRWriteResGroup556], (instrs WRMSR)>; -def SPRWriteResGroup557 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> { +def SPRWriteResGroup557 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort05]> { let ReleaseAtCycles = [2, 1, 4, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 8; } def : InstRW<[SPRWriteResGroup557], (instrs WRPKRUr)>; -def SPRWriteResGroup558 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup558 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 12; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup558, WriteRMW], (instregex "^XADD(16|32|64)rm$")>; -def SPRWriteResGroup559 : SchedWriteRes<[SPRPort00_01_05_06_10]> { +def SPRWriteResGroup559 : SchedWriteRes<[SPRPort00_01_05_06_11]> { let ReleaseAtCycles = [2]; let Latency = 13; let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup559, WriteRMW], (instrs XADD8rm)>; -def SPRWriteResGroup560 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup560 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 39; let NumMicroOps = 5; } def : InstRW<[SPRWriteResGroup560, WriteRMW], (instregex "^XCHG(16|32)rm$")>; -def SPRWriteResGroup561 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup561 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [5, 1]; let Latency = 39; let NumMicroOps = 6; } def : InstRW<[SPRWriteResGroup561, WriteRMW], (instrs XCHG64rm)>; -def SPRWriteResGroup562 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup562 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [4, 1]; let Latency = 40; let NumMicroOps = 5; @@ -5112,21 +5112,21 @@ def SPRWriteResGroup563 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort0 } def : InstRW<[SPRWriteResGroup563], (instrs XCH_F)>; -def SPRWriteResGroup564 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01]> { +def SPRWriteResGroup564 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01]> { let ReleaseAtCycles = [7, 3, 8, 5]; let Latency = 4; let NumMicroOps = 23; } def : InstRW<[SPRWriteResGroup564], (instrs XGETBV)>; -def SPRWriteResGroup565 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> { +def SPRWriteResGroup565 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1]; let Latency = 7; let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup565], (instrs XLAT)>; -def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_11, SPRPort06]> { +def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_10, SPRPort06]> { let ReleaseAtCycles = [1, 21, 1, 8]; let Latency = 37; let NumMicroOps = 31; @@ -5134,70 +5134,70 @@ def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_1 def : InstRW<[SPRWriteResGroup566], (instregex "^XRSTOR((S|64)?)$")>; def : InstRW<[SPRWriteResGroup566], (instrs XRSTORS64)>; -def SPRWriteResGroup567 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup567 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 42; let NumMicroOps = 140; } def : InstRW<[SPRWriteResGroup567], (instrs XSAVE)>; -def SPRWriteResGroup568 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup568 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1]; let Latency = 41; let NumMicroOps = 140; } def : InstRW<[SPRWriteResGroup568], (instrs XSAVE64)>; -def SPRWriteResGroup569 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup569 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 151; } def : InstRW<[SPRWriteResGroup569], (instrs XSAVEC)>; -def SPRWriteResGroup570 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup570 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2]; let Latency = 42; let NumMicroOps = 152; } def : InstRW<[SPRWriteResGroup570], (instrs XSAVEC64)>; -def SPRWriteResGroup571 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup571 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [25, 35, 52, 27, 4, 1, 10, 1]; let Latency = 42; let NumMicroOps = 155; } def : InstRW<[SPRWriteResGroup571], (instrs XSAVEOPT)>; -def SPRWriteResGroup572 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup572 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [25, 35, 53, 27, 4, 1, 10, 1]; let Latency = 42; let NumMicroOps = 156; } def : InstRW<[SPRWriteResGroup572], (instrs XSAVEOPT64)>; -def SPRWriteResGroup573 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup573 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2]; let Latency = 42; let NumMicroOps = 184; } def : InstRW<[SPRWriteResGroup573], (instrs XSAVES)>; -def SPRWriteResGroup574 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> { +def SPRWriteResGroup574 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_10, SPRPort04_09, SPRPort05, SPRPort07_08]> { let ReleaseAtCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2]; let Latency = 42; let NumMicroOps = 186; } def : InstRW<[SPRWriteResGroup574], (instrs XSAVES64)>; -def SPRWriteResGroup575 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> { +def SPRWriteResGroup575 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_11, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort05]> { let ReleaseAtCycles = [4, 23, 2, 14, 8, 1, 2]; let Latency = 5; let NumMicroOps = 54; } def : InstRW<[SPRWriteResGroup575], (instrs XSETBV)>; -def SPRWriteResGroup576 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> { +def SPRWriteResGroup576 : SchedWriteRes<[SPRPort00_01_05_06_11, SPRPort00_06]> { let ReleaseAtCycles = [2, 1]; let Latency = SapphireRapidsModel.MaxLatency; let NumMicroOps = 3; diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index e0c857b60c4096..51d6b7cb9b1fd6 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2011,7 +2011,7 @@ const StringMap sys::getHostCPUFeatures() { const StringMap sys::getHostCPUFeatures() { unsigned long hwcap = getauxval(AT_HWCAP); bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU - const uint32_t cpucfg2 = 0x2, cpucfg3 = 0x3; + uint32_t cpucfg2 = 0x2, cpucfg3 = 0x3; __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2)); __asm__("cpucfg %[cpucfg3], %[cpucfg3]\n\t" : [cpucfg3] "+r"(cpucfg3)); diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index b236e26f495dfd..c60c5a0fc2bb78 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -471,6 +471,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, break; case GK_GFX950: Features["prng-inst"] = true; + Features["permlane16-swap"] = true; + Features["permlane32-swap"] = true; Features["gfx950-insts"] = true; [[fallthrough]]; case GK_GFX942: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index e2eae7fb8327cc..b4033fc2a418a1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1185,14 +1185,27 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) { /// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01 /// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01 /// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer. -Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, - bool IsAnd) { +Value *InstCombinerImpl::foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd) { if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) return nullptr; CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; - auto GetMatchPart = [&](ICmpInst *Cmp, + auto GetMatchPart = [&](Value *CmpV, unsigned OpNo) -> std::optional { + assert(CmpV->getType()->isIntOrIntVectorTy(1) && "Must be bool"); + + Value *X, *Y; + // icmp ne (and x, 1), (and y, 1) <=> trunc (xor x, y) to i1 + // icmp eq (and x, 1), (and y, 1) <=> not (trunc (xor x, y) to i1) + if (Pred == CmpInst::ICMP_NE + ? match(CmpV, m_Trunc(m_Xor(m_Value(X), m_Value(Y)))) + : match(CmpV, m_Not(m_Trunc(m_Xor(m_Value(X), m_Value(Y)))))) + return {{OpNo == 0 ? X : Y, 0, 1}}; + + auto *Cmp = dyn_cast(CmpV); + if (!Cmp) + return std::nullopt; + if (Pred == Cmp->getPredicate()) return matchIntPart(Cmp->getOperand(OpNo)); @@ -1465,11 +1478,15 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, // FCmp canonicalization ensures that (fcmp ord/uno X, X) and // (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0). - if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP())) + if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP())) { // Ignore the constants because they are obviously not NANs: // (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y) // (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y) + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(LHS->getFastMathFlags() & + RHS->getFastMathFlags()); return Builder.CreateFCmp(PredL, LHS0, RHS0); + } } if (IsAnd && stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) { @@ -2728,47 +2745,31 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { foldBooleanAndOr(Op0, Op1, I, /*IsAnd=*/true, /*IsLogical=*/false)) return replaceInstUsesWith(I, Res); - { - ICmpInst *LHS = dyn_cast(Op0); - ICmpInst *RHS = dyn_cast(Op1); - - // TODO: Base this on foldBooleanAndOr instead? - // TODO: Make this recursive; it's a little tricky because an arbitrary - // number of 'and' instructions might have to be created. - if (LHS && match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op1); - // LHS & (X && Y) --> (LHS && X) && Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(Res, Y) - : Builder.CreateAnd(Res, Y)); - // LHS & (X && Y) --> X && (LHS & Y) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(X, Res) - : Builder.CreateAnd(X, Res)); - } - if (RHS && match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op0); - // (X && Y) & RHS --> (X && RHS) && Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(Res, Y) - : Builder.CreateAnd(Res, Y)); - // (X && Y) & RHS --> X && (Y & RHS) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalAnd(X, Res) - : Builder.CreateAnd(X, Res)); - } + // TODO: Make this recursive; it's a little tricky because an arbitrary + // number of 'and' instructions might have to be created. + if (match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // Op0 & (X && Y) --> (Op0 && X) && Y + if (Value *Res = foldBooleanAndOr(Op0, X, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // Op0 & (X && Y) --> X && (Op0 & Y) + if (Value *Res = foldBooleanAndOr(Op0, Y, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); + } + if (match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X && Y) & Op1 --> (X && Op1) && Y + if (Value *Res = foldBooleanAndOr(X, Op1, I, /* IsAnd */ true, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(Res, Y) + : Builder.CreateAnd(Res, Y)); + // (X && Y) & Op1 --> X && (Y & Op1) + if (Value *Res = foldBooleanAndOr(Y, Op1, I, /* IsAnd */ true, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalAnd(X, Res) + : Builder.CreateAnd(X, Res)); } if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) @@ -3416,9 +3417,6 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, return X; } - if (Value *X = foldEqOfParts(LHS, RHS, IsAnd)) - return X; - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs. @@ -3541,6 +3539,9 @@ Value *InstCombinerImpl::foldBooleanAndOr(Value *LHS, Value *RHS, if (Value *Res = foldLogicOfFCmps(LHSCmp, RHSCmp, IsAnd, IsLogical)) return Res; + if (Value *Res = foldEqOfParts(LHS, RHS, IsAnd)) + return Res; + return nullptr; } @@ -3829,48 +3830,31 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { foldBooleanAndOr(Op0, Op1, I, /*IsAnd=*/false, /*IsLogical=*/false)) return replaceInstUsesWith(I, Res); - { - ICmpInst *LHS = dyn_cast(Op0); - ICmpInst *RHS = dyn_cast(Op1); - - // TODO: Base this on foldBooleanAndOr instead? - // TODO: Make this recursive; it's a little tricky because an arbitrary - // number of 'or' instructions might have to be created. - Value *X, *Y; - if (LHS && match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op1); - // LHS | (X || Y) --> (LHS || X) || Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(Res, Y) - : Builder.CreateOr(Res, Y)); - // LHS | (X || Y) --> X || (LHS | Y) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(X, Res) - : Builder.CreateOr(X, Res)); - } - if (RHS && match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { - bool IsLogical = isa(Op0); - // (X || Y) | RHS --> (X || RHS) || Y - if (auto *Cmp = dyn_cast(X)) - if (Value *Res = - foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, IsLogical)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(Res, Y) - : Builder.CreateOr(Res, Y)); - // (X || Y) | RHS --> X || (Y | RHS) - if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, - /* IsLogical */ false)) - return replaceInstUsesWith(I, IsLogical - ? Builder.CreateLogicalOr(X, Res) - : Builder.CreateOr(X, Res)); - } + // TODO: Make this recursive; it's a little tricky because an arbitrary + // number of 'or' instructions might have to be created. + if (match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op1); + // Op0 | (X || Y) --> (Op0 || X) || Y + if (Value *Res = foldBooleanAndOr(Op0, X, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // Op0 | (X || Y) --> X || (Op0 | Y) + if (Value *Res = foldBooleanAndOr(Op0, Y, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); + } + if (match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) { + bool IsLogical = isa(Op0); + // (X || Y) | Op1 --> (X || Op1) || Y + if (Value *Res = foldBooleanAndOr(X, Op1, I, /* IsAnd */ false, IsLogical)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(Res, Y) + : Builder.CreateOr(Res, Y)); + // (X || Y) | Op1 --> X || (Y | Op1) + if (Value *Res = foldBooleanAndOr(Y, Op1, I, /* IsAnd */ false, + /* IsLogical */ false)) + return replaceInstUsesWith(I, IsLogical ? Builder.CreateLogicalOr(X, Res) + : Builder.CreateOr(X, Res)); } if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9588930d7658c4..0508ed48fc19c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -412,7 +412,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final bool IsAnd, bool IsLogical = false); Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor); - Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd); + Value *foldEqOfParts(Value *Cmp0, Value *Cmp1, bool IsAnd); Value *foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, ICmpInst *ICmp2, bool IsAnd); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 139e75dd3ddb34..555267327a0a9e 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -244,7 +244,7 @@ class ModuleSanitizerCoverage { void InjectTraceForSwitch(Function &F, ArrayRef SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef AllBlocks, - bool IsLeafFunc = true); + bool IsLeafFunc); GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, Function &F, Type *Ty, const char *Section); @@ -254,7 +254,7 @@ class ModuleSanitizerCoverage { Instruction *I); Value *CreateFunctionLocalGateCmp(IRBuilder<> &IRB); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, - Value *&FunctionGateCmp, bool IsLeafFunc = true); + Value *&FunctionGateCmp, bool IsLeafFunc); Function *CreateInitCallsForSections(Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty, const char *Section); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 1991ec82d1e1e4..b664bde5d320a1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1662,21 +1662,43 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1, /// \endcode /// /// So we need to turn hoisted load/store into cload/cstore. +/// +/// \param BI The branch instruction. +/// \param SpeculatedConditionalLoadsStores The load/store instructions that +/// will be speculated. +/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG. static void hoistConditionalLoadsStores( BranchInst *BI, SmallVectorImpl &SpeculatedConditionalLoadsStores, - bool Invert) { + std::optional Invert) { auto &Context = BI->getParent()->getContext(); auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1); auto *Cond = BI->getOperand(0); // Construct the condition if needed. BasicBlock *BB = BI->getParent(); - IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back()); - Value *Mask = Builder.CreateBitCast( - Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, - VCondTy); + IRBuilder<> Builder( + Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI); + Value *Mask = nullptr; + Value *MaskFalse = nullptr; + Value *MaskTrue = nullptr; + if (Invert.has_value()) { + Mask = Builder.CreateBitCast( + *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, + VCondTy); + } else { + MaskFalse = Builder.CreateBitCast( + Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy); + MaskTrue = Builder.CreateBitCast(Cond, VCondTy); + } + auto PeekThroughBitcasts = [](Value *V) { + while (auto *BitCast = dyn_cast(V)) + V = BitCast->getOperand(0); + return V; + }; for (auto *I : SpeculatedConditionalLoadsStores) { - IRBuilder<> Builder(I); + IRBuilder<> Builder(Invert.has_value() ? I : BI); + if (!Invert.has_value()) + Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse; // We currently assume conditional faulting load/store is supported for // scalar types only when creating new instructions. This can be easily // extended for vector types in the future. @@ -1688,12 +1710,14 @@ static void hoistConditionalLoadsStores( auto *Ty = I->getType(); PHINode *PN = nullptr; Value *PassThru = nullptr; - for (User *U : I->users()) - if ((PN = dyn_cast(U))) { - PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB), - FixedVectorType::get(Ty, 1)); - break; - } + if (Invert.has_value()) + for (User *U : I->users()) + if ((PN = dyn_cast(U))) { + PassThru = Builder.CreateBitCast( + PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)), + FixedVectorType::get(Ty, 1)); + break; + } MaskedLoadStore = Builder.CreateMaskedLoad( FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru); Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty); @@ -1702,8 +1726,8 @@ static void hoistConditionalLoadsStores( I->replaceAllUsesWith(NewLoadStore); } else { // Handle Store. - auto *StoredVal = - Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1)); + auto *StoredVal = Builder.CreateBitCast( + PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1)); MaskedLoadStore = Builder.CreateMaskedStore( StoredVal, I->getOperand(1), cast(I)->getAlign(), Mask); } @@ -3155,7 +3179,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, return HaveRewritablePHIs; } -static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, +static bool isProfitableToSpeculate(const BranchInst *BI, + std::optional Invert, const TargetTransformInfo &TTI) { // If the branch is non-unpredictable, and is predicted to *not* branch to // the `then` block, then avoid speculating it. @@ -3166,7 +3191,10 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0) return true; - uint64_t EndWeight = Invert ? TWeight : FWeight; + if (!Invert.has_value()) + return false; + + uint64_t EndWeight = *Invert ? TWeight : FWeight; BranchProbability BIEndProb = BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); BranchProbability Likely = TTI.getPredictableBranchThreshold(); @@ -8034,6 +8062,35 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (HoistCommon && hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts)) return requestResimplify(); + + if (BI && HoistLoadsStoresWithCondFaulting && + Options.HoistLoadsStoresWithCondFaulting && + isProfitableToSpeculate(BI, std::nullopt, TTI)) { + SmallVector SpeculatedConditionalLoadsStores; + auto CanSpeculateConditionalLoadsStores = [&]() { + for (auto *Succ : successors(BB)) { + for (Instruction &I : *Succ) { + if (I.isTerminator()) { + if (I.getNumSuccessors() > 1) + return false; + continue; + } else if (!isSafeCheapLoadStore(&I, TTI) || + SpeculatedConditionalLoadsStores.size() == + HoistLoadsStoresWithCondFaultingThreshold) { + return false; + } + SpeculatedConditionalLoadsStores.push_back(&I); + } + } + return !SpeculatedConditionalLoadsStores.empty(); + }; + + if (CanSpeculateConditionalLoadsStores()) { + hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, + std::nullopt); + return requestResimplify(); + } + } } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index a6b5235235ff3b..fbcf181a45a664 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -234,9 +234,9 @@ class VPBuilder { VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, - VPValue *Step) { + VPValue *Step, const Twine &Name = "") { return tryInsertInstruction( - new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step)); + new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name)); } VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d13770a35c108f..d68a26251ac9d4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9603,7 +9603,7 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); - DerivedIV->setName("offset.idx"); + DerivedIV->setName(Name); assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); State.set(this, DerivedIV, VPLane(0)); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8e0ca2677bf0a9..d033b7c2ef4a92 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -456,15 +456,18 @@ static std::string shortBundleName(ArrayRef VL, int Idx = -1) { /// \returns true if all of the instructions in \p VL are in the same block or /// false otherwise. static bool allSameBlock(ArrayRef VL) { - Instruction *I0 = dyn_cast(VL[0]); - if (!I0) + auto *It = find_if(VL, IsaPred); + if (It == VL.end()) return false; + Instruction *I0 = cast(*It); if (all_of(VL, isVectorLikeInstWithConstOps)) return true; BasicBlock *BB = I0->getParent(); - for (int I = 1, E = VL.size(); I < E; I++) { - auto *II = dyn_cast(VL[I]); + for (Value *V : iterator_range(It, VL.end())) { + if (isa(V)) + continue; + auto *II = dyn_cast(V); if (!II) return false; @@ -893,10 +896,19 @@ static bool isCmpSameOrSwapped(const CmpInst *BaseCI, const CmpInst *CI, static InstructionsState getSameOpcode(ArrayRef VL, const TargetLibraryInfo &TLI) { // Make sure these are all Instructions. - if (!all_of(VL, IsaPred)) + if (!all_of(VL, IsaPred)) + return InstructionsState::invalid(); + + auto *It = find_if(VL, IsaPred); + if (It == VL.end()) + return InstructionsState::invalid(); + + Value *V = *It; + unsigned InstCnt = std::count_if(It, VL.end(), IsaPred); + if ((VL.size() > 2 && !isa(V) && InstCnt < VL.size() / 2) || + (VL.size() == 2 && InstCnt < 2)) return InstructionsState::invalid(); - Value *V = VL.front(); bool IsCastOp = isa(V); bool IsBinOp = isa(V); bool IsCmpOp = isa(V); @@ -904,7 +916,7 @@ static InstructionsState getSameOpcode(ArrayRef VL, IsCmpOp ? cast(V)->getPredicate() : CmpInst::BAD_ICMP_PREDICATE; unsigned Opcode = cast(V)->getOpcode(); unsigned AltOpcode = Opcode; - unsigned AltIndex = 0; + unsigned AltIndex = std::distance(VL.begin(), It); bool SwappedPredsCompatible = [&]() { if (!IsCmpOp) @@ -940,8 +952,17 @@ static InstructionsState getSameOpcode(ArrayRef VL, if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty()) return InstructionsState::invalid(); } + bool AnyPoison = InstCnt != VL.size(); for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { - auto *I = cast(VL[Cnt]); + auto *I = dyn_cast(VL[Cnt]); + if (!I) + continue; + + // Cannot combine poison and divisions. + // TODO: do some smart analysis of the CallInsts to exclude divide-like + // intrinsics/functions only. + if (AnyPoison && (I->isIntDivRem() || I->isFPDivRem() || isa(I))) + return InstructionsState::invalid(); unsigned InstOpcode = I->getOpcode(); if (IsBinOp && isa(I)) { if (InstOpcode == Opcode || InstOpcode == AltOpcode) @@ -1183,10 +1204,13 @@ static SmallBitVector getAltInstrMask(ArrayRef VL, unsigned Opcode0, Type *ScalarTy = VL[0]->getType(); unsigned ScalarTyNumElements = getNumElements(ScalarTy); SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false); - for (unsigned Lane : seq(VL.size())) + for (unsigned Lane : seq(VL.size())) { + if (isa(VL[Lane])) + continue; if (cast(VL[Lane])->getOpcode() == Opcode1) OpcodeMask.set(Lane * ScalarTyNumElements, Lane * ScalarTyNumElements + ScalarTyNumElements); + } return OpcodeMask; } @@ -1371,22 +1395,46 @@ class BoUpSLP { return VectorizableTree.front()->Scalars; } + /// Returns the type/is-signed info for the root node in the graph without + /// casting. + std::optional> getRootNodeTypeWithNoCast() const { + const TreeEntry &Root = *VectorizableTree.front().get(); + if (Root.State != TreeEntry::Vectorize || Root.isAltShuffle() || + !Root.Scalars.front()->getType()->isIntegerTy()) + return std::nullopt; + auto It = MinBWs.find(&Root); + if (It != MinBWs.end()) + return std::make_pair(IntegerType::get(Root.Scalars.front()->getContext(), + It->second.first), + It->second.second); + if (Root.getOpcode() == Instruction::ZExt || + Root.getOpcode() == Instruction::SExt) + return std::make_pair(cast(Root.getMainOp())->getSrcTy(), + Root.getOpcode() == Instruction::SExt); + return std::nullopt; + } + /// Checks if the root graph node can be emitted with narrower bitwidth at /// codegen and returns it signedness, if so. bool isSignedMinBitwidthRootNode() const { return MinBWs.at(VectorizableTree.front().get()).second; } - /// Returns reduction bitwidth and signedness, if it does not match the - /// original requested size. - std::optional> getReductionBitWidthAndSign() const { + /// Returns reduction type after minbitdth analysis. + FixedVectorType *getReductionType() const { if (ReductionBitWidth == 0 || + !VectorizableTree.front()->Scalars.front()->getType()->isIntegerTy() || ReductionBitWidth >= DL->getTypeSizeInBits( VectorizableTree.front()->Scalars.front()->getType())) - return std::nullopt; - return std::make_pair(ReductionBitWidth, - MinBWs.at(VectorizableTree.front().get()).second); + return getWidenedType( + VectorizableTree.front()->Scalars.front()->getType(), + VectorizableTree.front()->getVectorFactor()); + return getWidenedType( + IntegerType::get( + VectorizableTree.front()->Scalars.front()->getContext(), + ReductionBitWidth), + VectorizableTree.front()->getVectorFactor()); } /// Builds external uses of the vectorized scalars, i.e. the list of @@ -1799,13 +1847,17 @@ class BoUpSLP { (S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() || !S.isAltShuffle()) && all_of(Ops, [&S](Value *V) { - return cast(V)->getNumOperands() == - S.MainOp->getNumOperands(); + return isa(V) || + cast(V)->getNumOperands() == + S.MainOp->getNumOperands(); })) return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes : LookAheadHeuristics::ScoreSameOpcode; } + if (I1 && isa(V2)) + return LookAheadHeuristics::ScoreSameOpcode; + if (isa(V2)) return LookAheadHeuristics::ScoreUndef; @@ -2354,17 +2406,17 @@ class BoUpSLP { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); - assert(isa(VL[0]) && "Expected instruction"); constexpr unsigned IntrinsicNumOperands = 2; - unsigned NumOperands = isa(VL[0]) - ? IntrinsicNumOperands - : cast(VL[0])->getNumOperands(); + auto *VL0 = cast(*find_if(VL, IsaPred)); + unsigned NumOperands = isa(VL0) ? IntrinsicNumOperands + : VL0->getNumOperands(); OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { OpsVec[OpIdx].resize(NumLanes); for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { - assert(isa(VL[Lane]) && "Expected instruction"); + assert((isa(VL[Lane]) || isa(VL[Lane])) && + "Expected instruction or poison value"); // Our tree has just 3 nodes: the root and two operands. // It is therefore trivial to get the APO. We only need to check the // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or @@ -2375,6 +2427,12 @@ class BoUpSLP { // Since operand reordering is performed on groups of commutative // operations or alternating sequences (e.g., +, -), we can safely // tell the inverse operations by checking commutativity. + if (isa(VL[Lane])) { + OpsVec[OpIdx][Lane] = { + PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true, + false}; + continue; + } bool IsInverseOperation = !isCommutative(cast(VL[Lane])); bool APO = (OpIdx == 0) ? false : IsInverseOperation; OpsVec[OpIdx][Lane] = {cast(VL[Lane])->getOperand(OpIdx), @@ -2472,7 +2530,7 @@ class BoUpSLP { Value *OpILn = getValue(OpI, Ln); return (L && L->isLoopInvariant(OpILn)) || (getSameOpcode({Op, OpILn}, TLI).getOpcode() && - Op->getParent() == cast(OpILn)->getParent()); + allSameBlock({Op, OpILn})); })) return true; } @@ -2484,7 +2542,8 @@ class BoUpSLP { VLOperands(ArrayRef RootVL, const BoUpSLP &R) : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R), L(R.LI->getLoopFor( - (cast(RootVL.front())->getParent()))) { + (cast(*find_if(RootVL, IsaPred)) + ->getParent()))) { // Append all the operands of RootVL. appendOperandsOfVL(RootVL); } @@ -3286,13 +3345,18 @@ class BoUpSLP { /// Set the operands of this bundle in their original order. void setOperandsInOrder() { assert(Operands.empty() && "Already initialized?"); - auto *I0 = cast(Scalars[0]); + auto *I0 = cast(*find_if(Scalars, IsaPred)); Operands.resize(I0->getNumOperands()); unsigned NumLanes = Scalars.size(); for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands(); OpIdx != NumOperands; ++OpIdx) { Operands[OpIdx].resize(NumLanes); for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + if (isa(Scalars[Lane])) { + Operands[OpIdx][Lane] = + PoisonValue::get(I0->getOperand(OpIdx)->getType()); + continue; + } auto *I = cast(Scalars[Lane]); assert(I->getNumOperands() == NumOperands && "Expected same number of operands"); @@ -4912,8 +4976,8 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, PointerOps.resize(Sz); auto *POIter = PointerOps.begin(); for (Value *V : VL) { - auto *L = cast(V); - if (!L->isSimple()) + auto *L = dyn_cast(V); + if (!L || !L->isSimple()) return LoadsState::Gather; *POIter = L->getPointerOperand(); ++POIter; @@ -5491,6 +5555,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { TE.ReuseShuffleIndices.end()); if (TE.getOpcode() == Instruction::ExtractElement && all_of(TE.Scalars, [Sz](Value *V) { + if (isa(V)) + return true; std::optional Idx = getExtractIndex(cast(V)); return Idx && *Idx < Sz; })) { @@ -5579,7 +5645,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { auto PHICompare = [&](unsigned I1, unsigned I2) { Value *V1 = TE.Scalars[I1]; Value *V2 = TE.Scalars[I2]; - if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0)) + if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0) || + isa(V1) || isa(V2)) return false; if (V1->getNumUses() < V2->getNumUses()) return true; @@ -7352,8 +7419,14 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, for (unsigned I : seq(0, S.MainOp->getNumOperands())) { Operands.emplace_back(); // Prepare the operand vector. - for (Value *V : VL) + for (Value *V : VL) { + if (isa(V)) { + Operands.back().push_back( + PoisonValue::get(S.MainOp->getOperand(I)->getType())); + continue; + } Operands.back().push_back(cast(V)->getOperand(I)); + } } if (Operands.size() == 2) { // Try find best operands candidates. @@ -7460,8 +7533,11 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( if (VL0->getNumOperands() > MaxPHINumOperands) return TreeEntry::NeedToGather; // Check for terminator values (e.g. invoke). - for (Value *V : VL) - for (Value *Incoming : cast(V)->incoming_values()) { + for (Value *V : VL) { + auto *PHI = dyn_cast(V); + if (!PHI) + continue; + for (Value *Incoming : PHI->incoming_values()) { Instruction *Term = dyn_cast(Incoming); if (Term && Term->isTerminator()) { LLVM_DEBUG(dbgs() @@ -7469,6 +7545,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( return TreeEntry::NeedToGather; } } + } return TreeEntry::Vectorize; } @@ -7544,8 +7621,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); - else if (any_of(VL, - [](Value *V) { return !cast(V)->isSimple(); })) + else if (any_of(VL, [](Value *V) { + auto *LI = dyn_cast(V); + return !LI || !LI->isSimple(); + })) LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); else LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); @@ -7569,6 +7648,8 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); for (Value *V : VL) { + if (isa(V)) + continue; Type *Ty = cast(V)->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { LLVM_DEBUG( @@ -7585,7 +7666,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0); Type *ComparedTy = VL0->getOperand(0)->getType(); for (Value *V : VL) { - CmpInst *Cmp = cast(V); + if (isa(V)) + continue; + auto *Cmp = cast(V); if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); @@ -7828,7 +7911,13 @@ class PHIHandler { } // Prepare the operand vector. for (auto [Idx, V] : enumerate(Phis)) { - auto *P = cast(V); + auto *P = dyn_cast(V); + if (!P) { + assert(isa(V) && + "Expected isa instruction or poison value."); + Operands[I][Idx] = V; + continue; + } if (P->getIncomingBlock(I) == InBB) Operands[I][Idx] = P->getIncomingValue(I); else @@ -7847,6 +7936,11 @@ class PHIHandler { Blocks.try_emplace(InBB).first->second.push_back(I); } for (auto [Idx, V] : enumerate(Phis)) { + if (isa(V)) { + for (unsigned I : seq(Main->getNumIncomingValues())) + Operands[I][Idx] = V; + continue; + } auto *P = cast(V); for (unsigned I : seq(0, P->getNumIncomingValues())) { BasicBlock *InBB = P->getIncomingBlock(I); @@ -7896,7 +7990,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( - isa(V) ? PoisonMaskElem : UniqueValues.size()); + isa(V) ? PoisonMaskElem : UniqueValues.size()); UniqueValues.emplace_back(V); continue; } @@ -7928,11 +8022,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, }))) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && - all_of(UniqueValues, [=](Value *V) { - return isa(V) || - areAllUsersVectorized(cast(V), - UserIgnoreList); - })) { + all_of(UniqueValues, IsaPred)) { // Find the number of elements, which forms full vectors. unsigned PWSz = getFullVectorNumberOfElements( *TTI, UniqueValues.front()->getType(), UniqueValues.size()); @@ -7940,8 +8030,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ReuseShuffleIndices.clear(); } else { NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end()); - NonUniqueValueVL.append(PWSz - UniqueValues.size(), - UniqueValues.back()); + NonUniqueValueVL.append( + PWSz - UniqueValues.size(), + PoisonValue::get(UniqueValues.front()->getType())); VL = NonUniqueValueVL; } return true; @@ -8076,7 +8167,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return true; // Check if all operands are extracts, part of vector node or can build a // regular vectorize node. - SmallVector InstsCount(VL.size(), 0); + SmallVector InstsCount; for (Value *V : VL) { auto *I = cast(V); InstsCount.push_back(count_if(I->operand_values(), [](Value *Op) { @@ -8470,6 +8561,11 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } else { // Collect operands - commute if it uses the swapped predicate. for (Value *V : VL) { + if (isa(V)) { + Left.push_back(PoisonValue::get(VL0->getOperand(0)->getType())); + Right.push_back(PoisonValue::get(VL0->getOperand(1)->getType())); + continue; + } auto *Cmp = cast(V); Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); @@ -8669,7 +8765,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (isa(VL0) || CI) { ValueList Left, Right; if (!CI || all_of(VL, [](Value *V) { - return cast(V)->isCommutative(); + return isa(V) || cast(V)->isCommutative(); })) { reorderInputsAccordingToOpcode(VL, Left, Right, *this); } else { @@ -8682,6 +8778,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Collect operands - commute if it uses the swapped predicate or // alternate operation. for (Value *V : VL) { + if (isa(V)) { + Left.push_back( + PoisonValue::get(MainCI->getOperand(0)->getType())); + Right.push_back( + PoisonValue::get(MainCI->getOperand(1)->getType())); + continue; + } auto *Cmp = cast(V); Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); @@ -8886,6 +8989,8 @@ void BoUpSLP::TreeEntry::buildAltOpShuffleMask( unsigned Idx = I; if (!ReorderIndices.empty()) Idx = OrderMask[I]; + if (isa(Scalars[Idx])) + continue; auto *OpInst = cast(Scalars[Idx]); if (IsAltOp(OpInst)) { Mask[I] = Sz + Idx; @@ -9660,9 +9765,11 @@ void BoUpSLP::transformNodes() { // Try to vectorize reduced values or if all users are vectorized. // For expensive instructions extra extracts might be profitable. if ((!UserIgnoreList || E.Idx != 0) && - TTI->getInstructionCost(cast(Slice.front()), - CostKind) < TTI::TCC_Expensive && + TTI->getInstructionCost(S.MainOp, CostKind) < + TTI::TCC_Expensive && !all_of(Slice, [&](Value *V) { + if (isa(V)) + return true; return areAllUsersVectorized(cast(V), UserIgnoreList); })) @@ -9685,12 +9792,13 @@ void BoUpSLP::transformNodes() { continue; } } else if (S.getOpcode() == Instruction::ExtractElement || - (TTI->getInstructionCost( - cast(Slice.front()), CostKind) < + (TTI->getInstructionCost(S.MainOp, CostKind) < TTI::TCC_Expensive && !CheckOperandsProfitability( - cast(Slice.front()), - cast(Slice.back()), S))) { + S.MainOp, + cast(*find_if(reverse(Slice), + IsaPred)), + S))) { // Do not vectorize extractelements (handled effectively // alread). Do not vectorize non-profitable instructions (with // low cost and non-vectorizable operands.) @@ -10958,7 +11066,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, const unsigned Sz = UniqueValues.size(); SmallBitVector UsedScalars(Sz, false); for (unsigned I = 0; I < Sz; ++I) { - if (getTreeEntry(UniqueValues[I]) == E) + if (isa(UniqueValues[I]) && getTreeEntry(UniqueValues[I]) == E) continue; UsedScalars.set(I); } @@ -11097,6 +11205,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, case Instruction::ExtractValue: case Instruction::ExtractElement: { auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *I = cast(UniqueValues[Idx]); VectorType *SrcVecTy; if (ShuffleOrOp == Instruction::ExtractElement) { @@ -11285,10 +11396,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecOpcode = Instruction::UIToFP; } auto GetScalarCost = [&](unsigned Idx) -> InstructionCost { - auto *VI = cast(UniqueValues[Idx]); + assert(Idx == 0 && "Expected 0 index only"); return TTI->getCastInstrCost(Opcode, VL0->getType(), VL0->getOperand(0)->getType(), - TTI::getCastContextHint(VI), CostKind, VI); + TTI::getCastContextHint(VL0), CostKind, VL0); }; auto GetVectorCost = [=](InstructionCost CommonCost) { // Do not count cost here if minimum bitwidth is in effect and it is just @@ -11297,6 +11408,20 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, return CommonCost; auto *VI = VL0->getOpcode() == Opcode ? VL0 : nullptr; TTI::CastContextHint CCH = GetCastContextHint(VL0->getOperand(0)); + + bool IsArithmeticExtendedReduction = + E->Idx == 0 && UserIgnoreList && + all_of(*UserIgnoreList, [](Value *V) { + auto *I = cast(V); + return is_contained({Instruction::Add, Instruction::FAdd, + Instruction::Mul, Instruction::FMul, + Instruction::And, Instruction::Or, + Instruction::Xor}, + I->getOpcode()); + }); + if (IsArithmeticExtendedReduction && + (VecOpcode == Instruction::ZExt || VecOpcode == Instruction::SExt)) + return CommonCost; return CommonCost + TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind, VecOpcode == Opcode ? VI : nullptr); @@ -11316,6 +11441,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ? CmpInst::BAD_FCMP_PREDICATE : CmpInst::BAD_ICMP_PREDICATE; auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *VI = cast(UniqueValues[Idx]); CmpInst::Predicate CurrentPred = ScalarTy->isFloatingPointTy() ? CmpInst::BAD_FCMP_PREDICATE @@ -11396,6 +11524,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, case Instruction::Or: case Instruction::Xor: { auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *VI = cast(UniqueValues[Idx]); unsigned OpIdx = isa(VI) ? 0 : 1; TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0)); @@ -11583,6 +11714,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, return false; }; auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *VI = cast(UniqueValues[Idx]); assert(E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode"); (void)E; @@ -12652,32 +12786,48 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { unsigned SrcSize = It->second.first; unsigned DstSize = ReductionBitWidth; unsigned Opcode = Instruction::Trunc; - if (SrcSize < DstSize) - Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt; - auto *SrcVecTy = - getWidenedType(Builder.getIntNTy(SrcSize), E.getVectorFactor()); - auto *DstVecTy = - getWidenedType(Builder.getIntNTy(DstSize), E.getVectorFactor()); - TTI::CastContextHint CCH = getCastContextHint(E); - InstructionCost CastCost; - switch (E.getOpcode()) { - case Instruction::SExt: - case Instruction::ZExt: - case Instruction::Trunc: { - const TreeEntry *OpTE = getOperandEntry(&E, 0); - CCH = getCastContextHint(*OpTE); - break; - } - default: - break; + if (SrcSize < DstSize) { + bool IsArithmeticExtendedReduction = + all_of(*UserIgnoreList, [](Value *V) { + auto *I = cast(V); + return is_contained({Instruction::Add, Instruction::FAdd, + Instruction::Mul, Instruction::FMul, + Instruction::And, Instruction::Or, + Instruction::Xor}, + I->getOpcode()); + }); + if (IsArithmeticExtendedReduction) + Opcode = + Instruction::BitCast; // Handle it by getExtendedReductionCost + else + Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt; + } + if (Opcode != Instruction::BitCast) { + auto *SrcVecTy = + getWidenedType(Builder.getIntNTy(SrcSize), E.getVectorFactor()); + auto *DstVecTy = + getWidenedType(Builder.getIntNTy(DstSize), E.getVectorFactor()); + TTI::CastContextHint CCH = getCastContextHint(E); + InstructionCost CastCost; + switch (E.getOpcode()) { + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::Trunc: { + const TreeEntry *OpTE = getOperandEntry(&E, 0); + CCH = getCastContextHint(*OpTE); + break; + } + default: + break; + } + CastCost += TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH, + TTI::TCK_RecipThroughput); + Cost += CastCost; + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << CastCost + << " for final resize for reduction from " << SrcVecTy + << " to " << DstVecTy << "\n"; + dbgs() << "SLP: Current total cost = " << Cost << "\n"); } - CastCost += TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH, - TTI::TCK_RecipThroughput); - Cost += CastCost; - LLVM_DEBUG(dbgs() << "SLP: Adding cost " << CastCost - << " for final resize for reduction from " << SrcVecTy - << " to " << DstVecTy << "\n"; - dbgs() << "SLP: Current total cost = " << Cost << "\n"); } } @@ -13373,8 +13523,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { if (E->getOpcode() == Instruction::GetElementPtr && !isa(V)) return true; - auto *I = cast(V); - return !E->isOpcodeOrAlt(I) || I->getParent() == BB || + auto *I = dyn_cast(V); + return !I || !E->isOpcodeOrAlt(I) || I->getParent() == BB || isVectorLikeInstWithConstOps(I); })) && "Expected gathered loads or GEPs or instructions from same basic " @@ -13473,8 +13623,9 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { })) || all_of(E->Scalars, [](Value *V) { - return !isVectorLikeInstWithConstOps(V) && - isUsedOutsideBlock(V); + return isa(V) || + (!isVectorLikeInstWithConstOps(V) && + isUsedOutsideBlock(V)); }) || (E->isGather() && E->Idx == 0 && all_of(E->Scalars, [](Value *V) { return isa(V) || @@ -14002,12 +14153,16 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *V1 = E1.VectorizedValue; if (V1->getType()->isIntOrIntVectorTy()) V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); Value *V2 = E2.VectorizedValue; if (V2->getType()->isIntOrIntVectorTy()) V2 = castToScalarTyElem(V2, any_of(E2.Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); @@ -14019,6 +14174,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *V1 = E1.VectorizedValue; if (V1->getType()->isIntOrIntVectorTy()) V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); @@ -14181,6 +14338,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *V = E->VectorizedValue; if (V->getType()->isIntOrIntVectorTy()) V = castToScalarTyElem(V, any_of(E->Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); @@ -14897,6 +15056,16 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy, Builder, *this); } +/// \returns \p I after propagating metadata from \p VL only for instructions in +/// \p VL. +static Instruction *propagateMetadata(Instruction *Inst, ArrayRef VL) { + SmallVector Insts; + for (Value *V : VL) + if (isa(V)) + Insts.push_back(V); + return llvm::propagateMetadata(Inst, Insts); +} + Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { IRBuilderBase::InsertPointGuard Guard(Builder); @@ -14966,6 +15135,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { IsSigned = It->second.second; else IsSigned = any_of(OpE->Scalars, [&](Value *R) { + if (isa(V)) + return false; return !isKnownNonNegative(R, SimplifyQuery(*DL)); }); return IsSigned; @@ -15054,7 +15225,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Builder.SetInsertPoint(LI); Value *Ptr = LI->getPointerOperand(); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = ::propagateMetadata(V, E->Scalars); NewV = FinalShuffle(NewV, E); E->VectorizedValue = NewV; return NewV; @@ -15387,7 +15558,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { static_cast(E->getOpcode()), Op); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); V = FinalShuffle(V, E); @@ -15481,11 +15652,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { RHS); propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end()); if (auto *I = dyn_cast(V)) { - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); // Drop nuw flags for abs(sub(commutative), true). if (!MinBWs.contains(E) && ShuffleOrOp == Instruction::Sub && any_of(E->Scalars, [](Value *V) { - return isCommutative(cast(V)); + return isa(V) || isCommutative(cast(V)); })) I->setHasNoUnsignedWrap(/*b=*/false); } @@ -15580,7 +15751,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Align CommonAlignment = computeCommonAlignment(E->Scalars); NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment); } - Value *V = propagateMetadata(NewLI, E->Scalars); + Value *V = ::propagateMetadata(NewLI, E->Scalars); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -15625,7 +15796,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { ST = Inst; } - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = ::propagateMetadata(ST, E->Scalars); E->VectorizedValue = V; ++NumVectorInstructions; @@ -15658,7 +15829,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (isa(V)) GEPs.push_back(V); } - V = propagateMetadata(I, GEPs); + V = ::propagateMetadata(I, GEPs); } V = FinalShuffle(V, E); @@ -15772,7 +15943,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); V = FinalShuffle(V, E); } else { assert(E->isAltShuffle() && @@ -15849,7 +16020,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { assert(LHS->getType() == VecTy && "Expected same type as operand."); if (auto *I = dyn_cast(LHS)) - LHS = propagateMetadata(I, E->Scalars); + LHS = ::propagateMetadata(I, E->Scalars); LHS = FinalShuffle(LHS, E); E->VectorizedValue = LHS; ++NumVectorInstructions; @@ -15890,9 +16061,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (auto *I = dyn_cast(Vec); I && Opcode == Instruction::Sub && !MinBWs.contains(E) && any_of(E->Scalars, [](Value *V) { + if (isa(V)) + return false; auto *IV = cast(V); - return IV->getOpcode() == Instruction::Sub && - isCommutative(cast(IV)); + return IV->getOpcode() == Instruction::Sub && isCommutative(IV); })) I->setHasNoUnsignedWrap(/*b=*/false); }; @@ -15905,7 +16077,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } V = Builder.CreateShuffleVector(V0, V1, Mask); if (auto *I = dyn_cast(V)) { - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } @@ -16483,6 +16655,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, if (auto *EE = dyn_cast(Scalar); EE && IgnoredExtracts.contains(EE)) continue; + if (isa(Scalar)) + continue; #ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { @@ -17381,9 +17555,13 @@ bool BoUpSLP::collectValuesToDemote( // by the insertelement instruction and not used in multiple vector nodes, it // cannot be demoted. bool IsSignedNode = any_of(E.Scalars, [&](Value *R) { + if (isa(R)) + return false; return !isKnownNonNegative(R, SimplifyQuery(*DL)); }); auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool { + if (isa(V)) + return true; if (MultiNodeScalars.contains(V)) return false; // For lat shuffle of sext/zext with many uses need to check the extra bit @@ -17566,6 +17744,8 @@ bool BoUpSLP::collectValuesToDemote( // inrange amount, we can always perform a SHL in a smaller type. auto ShlChecker = [&](unsigned BitWidth, unsigned) { return all_of(E.Scalars, [&](Value *V) { + if (isa(V)) + return true; auto *I = cast(V); KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); return AmtKnownBits.getMaxValue().ult(BitWidth); @@ -17580,6 +17760,8 @@ bool BoUpSLP::collectValuesToDemote( // already zeros. auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { return all_of(E.Scalars, [&](Value *V) { + if (isa(V)) + return true; auto *I = cast(V); KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); @@ -17849,6 +18031,8 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. bool IsKnownPositive = !IsSignedCmp && all_of(E.Scalars, [&](Value *R) { + if (isa(R)) + return true; KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); @@ -17856,6 +18040,8 @@ void BoUpSLP::computeMinimumValueSizes() { // We first check if all the bits of the roots are demanded. If they're not, // we can truncate the roots to this narrower type. for (Value *Root : E.Scalars) { + if (isa(Root)) + continue; unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT); TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType()->getScalarType()); @@ -17912,9 +18098,8 @@ void BoUpSLP::computeMinimumValueSizes() { !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || DL->getTypeSizeInBits(TreeRootIT) / - DL->getTypeSizeInBits(cast(E.Scalars.front()) - ->getOperand(0) - ->getType()) > + DL->getTypeSizeInBits( + E.getMainOp()->getOperand(0)->getType()) > 2))))) return 0u; // Round MaxBitWidth up to the next power-of-two. @@ -17933,7 +18118,8 @@ void BoUpSLP::computeMinimumValueSizes() { // x i1> to in)). if (all_of(*UserIgnoreList, [](Value *V) { - return cast(V)->getOpcode() == Instruction::Add; + return isa(V) || + cast(V)->getOpcode() == Instruction::Add; }) && VectorizableTree.front()->State == TreeEntry::Vectorize && VectorizableTree.front()->getOpcode() == Instruction::ZExt && @@ -17942,6 +18128,8 @@ void BoUpSLP::computeMinimumValueSizes() { ReductionBitWidth = 1; } else { for (Value *V : *UserIgnoreList) { + if (isa(V)) + continue; unsigned NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT); TypeSize NumTypeBits = DL->getTypeSizeInBits(V->getType()); unsigned BitWidth1 = NumTypeBits - NumSignBits; @@ -18057,8 +18245,10 @@ void BoUpSLP::computeMinimumValueSizes() { if (MinBWs.contains(TE)) continue; bool IsSigned = any_of(TE->Scalars, [&](Value *R) { - return !isKnownNonNegative(R, SimplifyQuery(*DL)); - }); + if (isa(R)) + return false; + return !isKnownNonNegative(R, SimplifyQuery(*DL)); + }); MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); } } @@ -19815,8 +20005,8 @@ class HorizontalReduction { // Estimate cost. InstructionCost TreeCost = V.getTreeCost(VL); - InstructionCost ReductionCost = getReductionCost( - TTI, VL, IsCmpSelMinMax, RdxFMF, V.getReductionBitWidthAndSign()); + InstructionCost ReductionCost = + getReductionCost(TTI, VL, IsCmpSelMinMax, RdxFMF, V); InstructionCost Cost = TreeCost + ReductionCost; LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n"); @@ -20107,14 +20297,14 @@ class HorizontalReduction { private: /// Calculate the cost of a reduction. - InstructionCost getReductionCost( - TargetTransformInfo *TTI, ArrayRef ReducedVals, - bool IsCmpSelMinMax, FastMathFlags FMF, - const std::optional> BitwidthAndSign) { + InstructionCost getReductionCost(TargetTransformInfo *TTI, + ArrayRef ReducedVals, + bool IsCmpSelMinMax, FastMathFlags FMF, + const BoUpSLP &R) { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; Type *ScalarTy = ReducedVals.front()->getType(); unsigned ReduxWidth = ReducedVals.size(); - FixedVectorType *VectorTy = getWidenedType(ScalarTy, ReduxWidth); + FixedVectorType *VectorTy = R.getReductionType(); InstructionCost VectorCost = 0, ScalarCost; // If all of the reduced values are constant, the vector cost is 0, since // the reduction value can be calculated at the compile time. @@ -20172,21 +20362,16 @@ class HorizontalReduction { VecTy, APInt::getAllOnes(ScalarTyNumElements), /*Insert*/ true, /*Extract*/ false, TTI::TCK_RecipThroughput); } else { - auto [Bitwidth, IsSigned] = - BitwidthAndSign.value_or(std::make_pair(0u, false)); - if (RdxKind == RecurKind::Add && Bitwidth == 1) { - // Represent vector_reduce_add(ZExt()) to - // ZExtOrTrunc(ctpop(bitcast to in)). - auto *IntTy = IntegerType::get(ScalarTy->getContext(), ReduxWidth); - IntrinsicCostAttributes ICA(Intrinsic::ctpop, IntTy, {IntTy}, FMF); - VectorCost = - TTI->getCastInstrCost(Instruction::BitCast, IntTy, - getWidenedType(ScalarTy, ReduxWidth), - TTI::CastContextHint::None, CostKind) + - TTI->getIntrinsicInstrCost(ICA, CostKind); - } else { + Type *RedTy = VectorTy->getElementType(); + auto [RType, IsSigned] = R.getRootNodeTypeWithNoCast().value_or( + std::make_pair(RedTy, true)); + if (RType == RedTy) { VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind); + } else { + VectorCost = TTI->getExtendedReductionCost( + RdxOpcode, !IsSigned, RedTy, getWidenedType(RType, ReduxWidth), + FMF, CostKind); } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a24a86b4201c31..529108a5aaa97f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -478,32 +478,23 @@ void VPIRBasicBlock::execute(VPTransformState *State) { void VPBasicBlock::execute(VPTransformState *State) { bool Replica = bool(State->Lane); - VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; - VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. - auto IsLoopRegion = [](VPBlockBase *BB) { - auto *R = dyn_cast(BB); - return R && !R->isReplicator(); + auto IsReplicateRegion = [](VPBlockBase *BB) { + auto *R = dyn_cast_or_null(BB); + return R && R->isReplicator(); }; // 1. Create an IR basic block. - if (PrevVPBB && /* A */ - !((SingleHPred = getSingleHierarchicalPredecessor()) && - SingleHPred->getExitingBasicBlock() == PrevVPBB && - PrevVPBB->getSingleHierarchicalSuccessor() && - (SingleHPred->getParent() == getEnclosingLoopRegion() && - !IsLoopRegion(SingleHPred))) && /* B */ - !(Replica && getPredecessors().empty())) { /* C */ - // The last IR basic block is reused, as an optimization, in three cases: - // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; - // B. when the current VPBB has a single (hierarchical) predecessor which - // is PrevVPBB and the latter has a single (hierarchical) successor which - // both are in the same non-replicator region; and - // C. when the current VPBB is an entry of a region replica - where PrevVPBB - // is the exiting VPBB of this region from a previous instance, or the - // predecessor of this region. - + if (this == getPlan()->getVectorPreheader() || + (Replica && this == getParent()->getEntry()) || + IsReplicateRegion(getSingleHierarchicalPredecessor())) { + // Reuse the previous basic block if the current VPBB is either + // * the vector preheader, + // * the entry to a replicate region, or + // * the exit of a replicate region. + State->CFG.VPBB2IRBB[this] = NewBB; + } else { NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); @@ -518,8 +509,6 @@ void VPBasicBlock::execute(VPTransformState *State) { State->CFG.PrevBB = NewBB; State->CFG.VPBB2IRBB[this] = NewBB; connectToPredecessors(State->CFG); - } else { - State->CFG.VPBB2IRBB[this] = NewBB; } // 2. Fill the IR basic block with IR instructions. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 70221e7af7dbbe..747fcb068cfb34 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -643,7 +643,7 @@ class VPBlockBase { virtual void dropAllReferences(VPValue *NewValue) = 0; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void printAsOperand(raw_ostream &OS, bool PrintType) const { + void printAsOperand(raw_ostream &OS, bool PrintType = false) const { OS << getName(); } @@ -3301,19 +3301,23 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { /// for floating point inductions. const FPMathOperator *FPBinOp; + /// Name to use for the generated IR instruction for the derived IV. + std::string Name; + public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) + VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, + const Twine &Name = "") : VPDerivedIVRecipe( IndDesc.getKind(), dyn_cast_or_null(IndDesc.getInductionBinOp()), - Start, CanonicalIV, Step) {} + Start, CanonicalIV, Step, Name) {} VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, - VPValue *Step) + VPValue *Step, const Twine &Name = "") : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind), - FPBinOp(FPBinOp) {} + FPBinOp(FPBinOp), Name(Name.str()) {} ~VPDerivedIVRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 8b8ab6be99b0d5..cb42cfe8159b04 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -93,34 +93,19 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { unsigned Opcode = R->getOpcode(); - switch (Opcode) { - case Instruction::ICmp: - case Instruction::FCmp: - return IntegerType::get(Ctx, 1); - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { + if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || + Instruction::isBitwiseLogicOp(Opcode)) { Type *ResTy = inferScalarType(R->getOperand(0)); assert(ResTy == inferScalarType(R->getOperand(1)) && "types for both operands must match for binary op"); CachedTypes[R->getOperand(1)] = ResTy; return ResTy; } + + switch (Opcode) { + case Instruction::ICmp: + case Instruction::FCmp: + return IntegerType::get(Ctx, 1); case Instruction::FNeg: case Instruction::Freeze: return inferScalarType(R->getOperand(0)); @@ -157,36 +142,26 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { } Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { - switch (R->getUnderlyingInstr()->getOpcode()) { - case Instruction::Call: { - unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); - return cast(R->getOperand(CallIdx)->getLiveInIRValue()) - ->getReturnType(); - } - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { + unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); + + if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || + Instruction::isBitwiseLogicOp(Opcode)) { Type *ResTy = inferScalarType(R->getOperand(0)); assert(ResTy == inferScalarType(R->getOperand(1)) && "inferred types for operands of binary op don't match"); CachedTypes[R->getOperand(1)] = ResTy; return ResTy; } + + if (Instruction::isCast(Opcode)) + return R->getUnderlyingInstr()->getType(); + + switch (Opcode) { + case Instruction::Call: { + unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); + return cast(R->getOperand(CallIdx)->getLiveInIRValue()) + ->getReturnType(); + } case Instruction::Select: { Type *ResTy = inferScalarType(R->getOperand(1)); assert(ResTy == inferScalarType(R->getOperand(2)) && @@ -197,21 +172,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { case Instruction::ICmp: case Instruction::FCmp: return IntegerType::get(Ctx, 1); - case Instruction::AddrSpaceCast: case Instruction::Alloca: - case Instruction::BitCast: - case Instruction::Trunc: - case Instruction::SExt: - case Instruction::ZExt: - case Instruction::FPExt: - case Instruction::FPTrunc: case Instruction::ExtractValue: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::FPToSI: - case Instruction::FPToUI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: return R->getUnderlyingInstr()->getType(); case Instruction::Freeze: case Instruction::FNeg: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 24cf4666c62ce3..b2ee31c3e240a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -859,7 +859,9 @@ void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, if (getNumOperands() != 0) { assert(getNumOperands() == 1 && "can have at most 1 operand"); O << " (extra operand: "; - printOperands(O, SlotTracker); + getOperand(0)->printAsOperand(O, SlotTracker); + O << " from "; + getParent()->getPredecessors()[0]->printAsOperand(O); O << ")"; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ad609da210fd10..1d1029710c7095 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -528,7 +528,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); VPSingleDefRecipe *BaseIV = CanonicalIV; if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { - BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step); + BaseIV = Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step, + "offset.idx"); } // Truncate base induction if needed. diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 89f74540669e45..71c7d547ac7d91 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -134,52 +134,43 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } return true; }; - for (const VPUser *U : EVL.users()) { - if (!TypeSwitch(U) - .Case( - [&](const VPWidenIntrinsicRecipe *S) { - return VerifyEVLUse(*S, S->getNumOperands() - 1); - }) - .Case([&](const VPWidenStoreEVLRecipe *S) { - return VerifyEVLUse(*S, 2); - }) - .Case( - [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) - .Case([&](const VPWidenEVLRecipe *W) { - return VerifyEVLUse( - *W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); - }) - .Case([&](const VPReductionEVLRecipe *R) { - return VerifyEVLUse(*R, 2); - }) - .Case( - [&](const VPScalarCastRecipe *S) { return true; }) - .Case([&](const VPInstruction *I) { - if (I->getOpcode() != Instruction::Add) { - errs() - << "EVL is used as an operand in non-VPInstruction::Add\n"; - return false; - } - if (I->getNumUsers() != 1) { - errs() << "EVL is used in VPInstruction:Add with multiple " - "users\n"; - return false; - } - if (!isa(*I->users().begin())) { - errs() << "Result of VPInstruction::Add with EVL operand is " - "not used by VPEVLBasedIVPHIRecipe\n"; - return false; - } - return true; - }) - .Default([&](const VPUser *U) { - errs() << "EVL has unexpected user\n"; - return false; - })) { - return false; - } - } - return true; + return all_of(EVL.users(), [&VerifyEVLUse](VPUser *U) { + return TypeSwitch(U) + .Case([&](const VPWidenIntrinsicRecipe *S) { + return VerifyEVLUse(*S, S->getNumOperands() - 1); + }) + .Case( + [&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); }) + .Case( + [&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); }) + .Case([&](const VPWidenEVLRecipe *W) { + return VerifyEVLUse(*W, + Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); + }) + .Case( + [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); }) + .Case([&](const VPInstruction *I) { + if (I->getOpcode() != Instruction::Add) { + errs() << "EVL is used as an operand in non-VPInstruction::Add\n"; + return false; + } + if (I->getNumUsers() != 1) { + errs() << "EVL is used in VPInstruction:Add with multiple " + "users\n"; + return false; + } + if (!isa(*I->users().begin())) { + errs() << "Result of VPInstruction::Add with EVL operand is " + "not used by VPEVLBasedIVPHIRecipe\n"; + return false; + } + return true; + }) + .Default([&](const VPUser *U) { + errs() << "EVL has unexpected user\n"; + return false; + }); + }); } bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { diff --git a/llvm/test/Analysis/CostModel/RISCV/abs.ll b/llvm/test/Analysis/CostModel/RISCV/abs.ll index 8f0dd7b0aefe97..7252716af86054 100644 --- a/llvm/test/Analysis/CostModel/RISCV/abs.ll +++ b/llvm/test/Analysis/CostModel/RISCV/abs.ll @@ -44,37 +44,37 @@ declare @llvm.abs.nxv64i8(, i1) define i32 @abs(i32 %arg) { ; CHECK-LABEL: 'abs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call @llvm.abs.nxv2i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call @llvm.abs.nxv4i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call @llvm.abs.nxv8i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = call @llvm.abs.nxv2i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = call @llvm.abs.nxv4i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call @llvm.abs.nxv8i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call @llvm.abs.nxv2i32( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call @llvm.abs.nxv4i32( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call @llvm.abs.nxv8i32( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call @llvm.abs.nxv16i32( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call @llvm.abs.nxv4i32( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call @llvm.abs.nxv8i32( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = call @llvm.abs.nxv16i32( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv2i16( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.abs.nxv4i16( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i16( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.abs.nxv16i16( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv32i16( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = call @llvm.abs.nxv8i16( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %23 = call @llvm.abs.nxv16i16( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %24 = call @llvm.abs.nxv32i16( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %27 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.abs.nxv8i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv16i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call @llvm.abs.nxv32i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call @llvm.abs.nxv64i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %30 = call @llvm.abs.nxv16i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %31 = call @llvm.abs.nxv32i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %32 = call @llvm.abs.nxv64i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) diff --git a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll index ea05464b084086..911993caba4d65 100644 --- a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll +++ b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll @@ -157,6 +157,260 @@ define void @bitreverse() { ret void } +define void @ctlz() { +; NOZVBB-LABEL: 'ctlz' +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call @llvm.ctlz.nxv1i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call @llvm.ctlz.nxv2i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call @llvm.ctlz.nxv4i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.ctlz.nxv8i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.ctlz.nxv16i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.ctlz.nxv32i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %11 = call @llvm.ctlz.nxv64i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call @llvm.ctlz.nxv1i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.ctlz.nxv2i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.ctlz.nxv4i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.ctlz.nxv8i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.ctlz.nxv16i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %21 = call @llvm.ctlz.nxv32i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.ctlz.nxv1i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call @llvm.ctlz.nxv2i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.ctlz.nxv4i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.ctlz.nxv8i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.ctlz.nxv16i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = call @llvm.ctlz.nxv1i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = call @llvm.ctlz.nxv2i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call @llvm.ctlz.nxv4i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = call @llvm.ctlz.nxv8i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = call @llvm.ctlz.nxv16i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVBB-LABEL: 'ctlz' +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.ctlz.nxv1i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.ctlz.nxv2i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.ctlz.nxv4i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.ctlz.nxv8i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.ctlz.nxv16i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.ctlz.nxv32i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.ctlz.nxv64i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.ctlz.nxv1i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.ctlz.nxv2i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.ctlz.nxv4i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.ctlz.nxv8i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.ctlz.nxv16i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.ctlz.nxv32i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.ctlz.nxv1i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.ctlz.nxv2i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.ctlz.nxv4i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.ctlz.nxv8i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.ctlz.nxv16i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.ctlz.nxv1i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.ctlz.nxv2i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.ctlz.nxv4i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.ctlz.nxv8i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.ctlz.nxv16i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false) + call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false) + call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false) + call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false) + call @llvm.ctlz.nxv1i8( undef, i1 false) + call @llvm.ctlz.nxv2i8( undef, i1 false) + call @llvm.ctlz.nxv4i8( undef, i1 false) + call @llvm.ctlz.nxv8i8( undef, i1 false) + call @llvm.ctlz.nxv16i8( undef, i1 false) + call @llvm.ctlz.nxv32i8( undef, i1 false) + call @llvm.ctlz.nxv64i8( undef, i1 false) + call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false) + call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false) + call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false) + call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false) + call @llvm.ctlz.nxv1i16( undef, i1 false) + call @llvm.ctlz.nxv2i16( undef, i1 false) + call @llvm.ctlz.nxv4i16( undef, i1 false) + call @llvm.ctlz.nxv8i16( undef, i1 false) + call @llvm.ctlz.nxv16i16( undef, i1 false) + call @llvm.ctlz.nxv32i16( undef, i1 false) + call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false) + call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false) + call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false) + call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false) + call @llvm.ctlz.nxv1i32( undef, i1 false) + call @llvm.ctlz.nxv2i32( undef, i1 false) + call @llvm.ctlz.nxv4i32( undef, i1 false) + call @llvm.ctlz.nxv8i32( undef, i1 false) + call @llvm.ctlz.nxv16i32( undef, i1 false) + call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false) + call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false) + call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false) + call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false) + call @llvm.ctlz.nxv1i64( undef, i1 false) + call @llvm.ctlz.nxv2i64( undef, i1 false) + call @llvm.ctlz.nxv4i64( undef, i1 false) + call @llvm.ctlz.nxv8i64( undef, i1 false) + call @llvm.ctlz.nxv16i64( undef, i1 false) + ret void +} + +define void @cttz() { +; NOZVBB-LABEL: 'cttz' +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %5 = call @llvm.cttz.nxv1i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.cttz.nxv2i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.cttz.nxv4i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.cttz.nxv8i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.cttz.nxv16i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.cttz.nxv32i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %11 = call @llvm.cttz.nxv64i8( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.cttz.nxv1i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.cttz.nxv2i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.cttz.nxv4i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %19 = call @llvm.cttz.nxv8i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %20 = call @llvm.cttz.nxv16i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %21 = call @llvm.cttz.nxv32i16( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %26 = call @llvm.cttz.nxv1i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %27 = call @llvm.cttz.nxv2i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %28 = call @llvm.cttz.nxv4i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %29 = call @llvm.cttz.nxv8i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %30 = call @llvm.cttz.nxv16i32( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %35 = call @llvm.cttz.nxv1i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %36 = call @llvm.cttz.nxv2i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.cttz.nxv4i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.cttz.nxv8i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.cttz.nxv16i64( undef, i1 false) +; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVBB-LABEL: 'cttz' +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.cttz.nxv1i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.cttz.nxv2i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.cttz.nxv4i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.cttz.nxv8i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.cttz.nxv16i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.cttz.nxv32i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.cttz.nxv64i8( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.cttz.nxv1i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.cttz.nxv2i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.cttz.nxv4i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.cttz.nxv8i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.cttz.nxv16i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.cttz.nxv32i16( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.cttz.nxv1i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.cttz.nxv2i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.cttz.nxv4i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.cttz.nxv8i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.cttz.nxv16i32( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.cttz.nxv1i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.cttz.nxv2i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.cttz.nxv4i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.cttz.nxv8i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.cttz.nxv16i64( undef, i1 false) +; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false) + call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false) + call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false) + call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false) + call @llvm.cttz.nxv1i8( undef, i1 false) + call @llvm.cttz.nxv2i8( undef, i1 false) + call @llvm.cttz.nxv4i8( undef, i1 false) + call @llvm.cttz.nxv8i8( undef, i1 false) + call @llvm.cttz.nxv16i8( undef, i1 false) + call @llvm.cttz.nxv32i8( undef, i1 false) + call @llvm.cttz.nxv64i8( undef, i1 false) + call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false) + call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false) + call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false) + call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false) + call @llvm.cttz.nxv1i16( undef, i1 false) + call @llvm.cttz.nxv2i16( undef, i1 false) + call @llvm.cttz.nxv4i16( undef, i1 false) + call @llvm.cttz.nxv8i16( undef, i1 false) + call @llvm.cttz.nxv16i16( undef, i1 false) + call @llvm.cttz.nxv32i16( undef, i1 false) + call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false) + call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false) + call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false) + call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false) + call @llvm.cttz.nxv1i32( undef, i1 false) + call @llvm.cttz.nxv2i32( undef, i1 false) + call @llvm.cttz.nxv4i32( undef, i1 false) + call @llvm.cttz.nxv8i32( undef, i1 false) + call @llvm.cttz.nxv16i32( undef, i1 false) + call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false) + call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false) + call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false) + call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false) + call @llvm.cttz.nxv1i64( undef, i1 false) + call @llvm.cttz.nxv2i64( undef, i1 false) + call @llvm.cttz.nxv4i64( undef, i1 false) + call @llvm.cttz.nxv8i64( undef, i1 false) + call @llvm.cttz.nxv16i64( undef, i1 false) + ret void +} + define void @ctpop() { ; NOZVBB-LABEL: 'ctpop' ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef) @@ -778,6 +1032,86 @@ declare @llvm.vp.bswap.nxv4i64(, @llvm.vp.bswap.nxv8i64(, , i32) declare @llvm.vp.bswap.nxv16i64(, , i32) +declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) +declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>, i1) +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) +declare @llvm.ctlz.nxv1i8(, i1) +declare @llvm.ctlz.nxv2i8(, i1) +declare @llvm.ctlz.nxv4i8(, i1) +declare @llvm.ctlz.nxv8i8(, i1) +declare @llvm.ctlz.nxv16i8(, i1) +declare @llvm.ctlz.nxv32i8(, i1) +declare @llvm.ctlz.nxv64i8(, i1) +declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>, i1) +declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) +declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) +declare @llvm.ctlz.nxv1i16(, i1) +declare @llvm.ctlz.nxv2i16(, i1) +declare @llvm.ctlz.nxv4i16(, i1) +declare @llvm.ctlz.nxv8i16(, i1) +declare @llvm.ctlz.nxv16i16(, i1) +declare @llvm.ctlz.nxv32i16(, i1) +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) +declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) +declare @llvm.ctlz.nxv1i32(, i1) +declare @llvm.ctlz.nxv2i32(, i1) +declare @llvm.ctlz.nxv4i32(, i1) +declare @llvm.ctlz.nxv8i32(, i1) +declare @llvm.ctlz.nxv16i32(, i1) +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) +declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) +declare <16 x i64> @llvm.ctlz.v16i64(<16 x i64>, i1) +declare @llvm.ctlz.nxv1i64(, i1) +declare @llvm.ctlz.nxv2i64(, i1) +declare @llvm.ctlz.nxv4i64(, i1) +declare @llvm.ctlz.nxv8i64(, i1) +declare @llvm.ctlz.nxv16i64(, i1) + +declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) +declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1) +declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) +declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) +declare @llvm.cttz.nxv1i8(, i1) +declare @llvm.cttz.nxv2i8(, i1) +declare @llvm.cttz.nxv4i8(, i1) +declare @llvm.cttz.nxv8i8(, i1) +declare @llvm.cttz.nxv16i8(, i1) +declare @llvm.cttz.nxv32i8(, i1) +declare @llvm.cttz.nxv64i8(, i1) +declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1) +declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) +declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) +declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) +declare @llvm.cttz.nxv1i16(, i1) +declare @llvm.cttz.nxv2i16(, i1) +declare @llvm.cttz.nxv4i16(, i1) +declare @llvm.cttz.nxv8i16(, i1) +declare @llvm.cttz.nxv16i16(, i1) +declare @llvm.cttz.nxv32i16(, i1) +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) +declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) +declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) +declare @llvm.cttz.nxv1i32(, i1) +declare @llvm.cttz.nxv2i32(, i1) +declare @llvm.cttz.nxv4i32(, i1) +declare @llvm.cttz.nxv8i32(, i1) +declare @llvm.cttz.nxv16i32(, i1) +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) +declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) +declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1) +declare <16 x i64> @llvm.cttz.v16i64(<16 x i64>, i1) +declare @llvm.cttz.nxv1i64(, i1) +declare @llvm.cttz.nxv2i64(, i1) +declare @llvm.cttz.nxv4i64(, i1) +declare @llvm.cttz.nxv8i64(, i1) +declare @llvm.cttz.nxv16i64(, i1) + declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32) diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 800ea223850d31..c7cd845a0a03f8 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=TYPEBASED +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=TYPEBASED define void @unsupported_fp_ops( %vec, i32 %extraarg) { ; CHECK-LABEL: 'unsupported_fp_ops' @@ -1147,28 +1147,28 @@ define void @abs() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.vp.abs.nxv2i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.abs.nxv2i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.vp.abs.nxv4i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv4i8( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.vp.abs.nxv8i8( undef, i1 false, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'abs' @@ -1182,28 +1182,28 @@ define void @abs() { ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.vp.abs.nxv2i8( undef, i1 false, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.abs.nxv2i8( undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.vp.abs.nxv4i8( undef, i1 false, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.abs.nxv4i8( undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call @llvm.vp.abs.nxv8i8( undef, i1 false, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = call @llvm.abs.nxv8i8( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call @llvm.vp.abs.nxv16i8( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call @llvm.abs.nxv16i8( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call @llvm.vp.abs.nxv2i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call @llvm.abs.nxv2i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %27 = call @llvm.vp.abs.nxv4i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %28 = call @llvm.abs.nxv4i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %29 = call @llvm.vp.abs.nxv8i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %30 = call @llvm.abs.nxv8i64( undef, i1 false) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %31 = call @llvm.vp.abs.nxv16i64( undef, i1 false, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %32 = call @llvm.abs.nxv16i64( undef, i1 false) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 0, <2 x i1> undef, i32 undef) @@ -2125,6 +2125,232 @@ define void @vp_fdiv(){ ret void } +define void @splat() { +; CHECK-LABEL: 'splat' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %40 = call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %66 = call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %67 = call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %68 = call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %69 = call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %70 = call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %71 = call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; TYPEBASED-LABEL: 'splat' +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %37 = call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %38 = call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %39 = call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %40 = call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %52 = call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %54 = call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %56 = call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %66 = call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %67 = call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %68 = call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %69 = call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %70 = call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %71 = call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %72 = call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef) + call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef) + call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef) + call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef) + call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef) + call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef) + call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef) + call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef) + call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef) + call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef) + call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef) + call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef) + call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef) + call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef) + call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef) + call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef) + call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef) + call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef) + call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef) + call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef) + call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef) + call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef) + call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef) + call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef) + call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef) + call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef) + call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i1(i1 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i8(i8 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i16(i16 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i32(i32 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16i64(i64 undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16f16(half undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16f32(float undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv2f64(double undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv4f64(double undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv8f64(double undef, undef, i32 undef) + call @llvm.experimental.vp.splat.nxv16f64(double undef, undef, i32 undef) + ret void +} + declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll index d8c87e2809cfa5..0dfd1d880f9cf2 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll @@ -404,6 +404,58 @@ define amdgpu_kernel void @smfmac_f32_16x16x128_fp8_fp8(<4 x i32> %arg0, <8 x i3 ret void } +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32, i32) + +; CHECK: DIVERGENT: %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) +define amdgpu_kernel void @smfmac_f32_32x32x64_bf8_bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, ptr addrspace(1) %out) { + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) + store <16 x float> %result, ptr addrspace(1) %out + ret void +} + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32, i32) + +; CHECK: DIVERGENT: %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) +define amdgpu_kernel void @smfmac_f32_32x32x64_bf8_fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, ptr addrspace(1) %out) { + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) + store <16 x float> %result, ptr addrspace(1) %out + ret void +} + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32, i32) + +; CHECK: DIVERGENT: %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) +define amdgpu_kernel void @smfmac_f32_32x32x64_fp8_bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, ptr addrspace(1) %out) { + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) + store <16 x float> %result, ptr addrspace(1) %out + ret void +} + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32, i32) + +; CHECK: DIVERGENT: %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) +define amdgpu_kernel void @smfmac_f32_32x32x64_fp8_fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, ptr addrspace(1) %out) { + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 1, i32 2) + store <16 x float> %result, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %src0, i32 %src1, i1 false, i1 false) +define amdgpu_kernel void @v_permlane16_swap(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %src0, i32 %src1, i1 false, i1 false) + store { i32, i32 } %v, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %src0, i32 %src1, i1 false, i1 false) +define amdgpu_kernel void @v_permlane32_swap(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %src0, i32 %src1, i1 false, i1 false) + store { i32, i32 } %v, ptr addrspace(1) %out + ret void +} + + + declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1 declare i32 @llvm.amdgcn.permlane16.i32(i32, i32, i32, i32, i1, i1) #1 declare i32 @llvm.amdgcn.permlanex16.i32(i32, i32, i32, i32, i1, i1) #1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index c92718f9e9b3c7..2464026aa125b5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -59,8 +59,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0) - ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>) + ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>) + ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x p0>) = COPY $q0 %1:_(<2 x p0>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir index 216f94b2b51e3f..f1d1b691fe1aa5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -57,11 +57,11 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_reg_output - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0 + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2490378 /* regdef:GPR32common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 - INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0:gpr32common + INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 2490378 /* regdef:GPR32common */, def %0:gpr32common %1:_(s32) = COPY %0 $w0 = COPY %1(s32) RET_ReallyLR implicit $w0 @@ -75,12 +75,12 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0, 3735562 /* regdef:FPR64 */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2490378 /* regdef:GPR32common */, def %0, 3342346 /* regdef:FPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2883594 /* regdef:GPR32common */, def %0:gpr32common, 3735562 /* regdef:FPR64 */, def %1:fpr64 + INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 2490378 /* regdef:GPR32common */, def %0:gpr32common, 3342346 /* regdef:FPR64 */, def %1:fpr64 %3:_(s32) = COPY %0 %4:_(s64) = COPY %1 $d0 = COPY %4(s64) diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll index 0e8465e7419936..ff66206228a4aa 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll @@ -13,7 +13,7 @@ define @test_svadd_i8( %Zn, asm "add $0.b, $1.b, $2.b", "=w,w,y"( %Zn, %Zm) @@ -29,7 +29,7 @@ define @test_svsub_i64( %Zn, asm "sub $0.d, $1.d, $2.d", "=w,w,x"( %Zn, %Zm) @@ -45,7 +45,7 @@ define @test_svfmul_f16( %Zn, asm "fmul $0.h, $1.h, $2.h", "=w,w,y"( %Zn, %Zm) @@ -61,7 +61,7 @@ define @test_svfmul_f( %Zn, asm "fmul $0.s, $1.s, $2.s", "=w,w,x"( %Zn, %Zm) @@ -79,7 +79,7 @@ define @test_svfadd_f16( %Pg, asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Upl,w,w"( %Pg, %Zn, %Zm) @@ -95,7 +95,7 @@ define @test_incp( %Pg, ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ppr = COPY $p0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[COPY1]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:zpr = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"incp $0.s, $1", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %2, {{[0-9]+}} /* reguse:PPR */, [[COPY2]], {{[0-9]+}} /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"incp $0.s, $1", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %2, 458761 /* reguse:PPR */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) ; CHECK-NEXT: $z0 = COPY %2 ; CHECK-NEXT: RET_ReallyLR implicit $z0 %1 = tail call asm "incp $0.s, $1", "=w,@3Upa,0"( %Pg, %Zn) @@ -113,7 +113,7 @@ define @test_svfadd_f16_Uph_constraint( %P ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr_p8to15 = COPY [[COPY2]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:zpr = COPY [[COPY1]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:zpr = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %3, {{[0-9]+}} /* reguse:PPR_p8to15 */, [[COPY3]], {{[0-9]+}} /* reguse:ZPR */, [[COPY4]], {{[0-9]+}} /* reguse:ZPR */, [[COPY5]] + ; CHECK-NEXT: INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, 5767178 /* regdef:ZPR */, def %3, 786441 /* reguse:PPR_p8to15 */, [[COPY3]], 5767177 /* reguse:ZPR */, [[COPY4]], 5767177 /* reguse:ZPR */, [[COPY5]] ; CHECK-NEXT: $z0 = COPY %3 ; CHECK-NEXT: RET_ReallyLR implicit $z0 %1 = tail call asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Uph,w,w"( %Pg, %Zn, %Zm) @@ -129,7 +129,7 @@ define void @explicit_p0(ptr %p) { ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg ; CHECK-NEXT: $p0 = COPY [[PTRUE_B]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $p0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: RET_ReallyLR %1 = tail call @llvm.aarch64.sve.ptrue.b8(i32 31) %2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p0},0"( %1, ptr %p) @@ -145,7 +145,7 @@ define void @explicit_p8_invalid(ptr %p) { ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg ; CHECK-NEXT: $p8 = COPY [[PTRUE_B]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $p8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: RET_ReallyLR %1 = tail call @llvm.aarch64.sve.ptrue.b8(i32 31) %2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p8},0"( %1, ptr %p) @@ -161,7 +161,7 @@ define void @explicit_pn8(ptr %p) { ; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg ; CHECK-NEXT: $pn8 = COPY [[PTRUE_C_B]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: RET_ReallyLR %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn8},0"(target("aarch64.svcount") %1, ptr %p) @@ -177,7 +177,7 @@ define void @explicit_pn0_invalid(ptr %p) { ; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg ; CHECK-NEXT: $pn0 = COPY [[PTRUE_C_B]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]] - ; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) ; CHECK-NEXT: RET_ReallyLR %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn0},0"(target("aarch64.svcount") %1, ptr %p) diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll index 25a14ef9a49ee8..d501d9ed24547a 100644 --- a/llvm/test/CodeGen/AArch64/abs.ll +++ b/llvm/test/CodeGen/AArch64/abs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; ===== Legal Scalars ===== diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll index 412c2b00a5ac09..9c54238c68e2c6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll +++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll @@ -1,78 +1,68 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL,FALLBACK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -; Function Attrs: nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) #0 declare i64 @llvm.ctlz.i64(i64, i1) #1 -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb32 define i32 @clrsb32(i32 %x) #2 { +; CHECK-LABEL: clrsb32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret entry: %shr = ashr i32 %x, 31 %xor = xor i32 %shr, %x %mul = shl i32 %xor, 1 %add = or i32 %mul, 1 %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 false) - ret i32 %0 -; CHECK-LABEL: clrsb32 -; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]] - -; GISEL-LABEL: clrsb32 -; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]] } -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb64 define i64 @clrsb64(i64 %x) #3 { +; CHECK-LABEL: clrsb64: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls x0, x0 +; CHECK-NEXT: ret entry: %shr = ashr i64 %x, 63 %xor = xor i64 %shr, %x %mul = shl nsw i64 %xor, 1 %add = or i64 %mul, 1 %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 false) - ret i64 %0 -; CHECK-LABEL: clrsb64 -; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]] -; GISEL-LABEL: clrsb64 -; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]] } -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb32_zeroundef define i32 @clrsb32_zeroundef(i32 %x) #2 { +; CHECK-LABEL: clrsb32_zeroundef: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls w0, w0 +; CHECK-NEXT: ret entry: %shr = ashr i32 %x, 31 %xor = xor i32 %shr, %x %mul = shl i32 %xor, 1 %add = or i32 %mul, 1 %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 true) - ret i32 %0 -; CHECK-LABEL: clrsb32_zeroundef -; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]] - -; GISEL-LABEL: clrsb32_zeroundef -; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]] } -; Function Attrs: nounwind ssp -; FALLBACK-NOT: remark{{.*}}clrsb64 define i64 @clrsb64_zeroundef(i64 %x) #3 { +; CHECK-LABEL: clrsb64_zeroundef: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls x0, x0 +; CHECK-NEXT: ret entry: %shr = ashr i64 %x, 63 %xor = xor i64 %shr, %x %mul = shl nsw i64 %xor, 1 %add = or i64 %mul, 1 %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 true) - ret i64 %0 -; CHECK-LABEL: clrsb64_zeroundef -; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]] -; GISEL-LABEL: clrsb64_zeroundef -; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]] } + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll index e32d83327fe424..50df6a0388587b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ext.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0 +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) { ; CHECK-LABEL: test_vextd: diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll index 475affa358bd15..0e1e15f9b6b912 100644 --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -1,12 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define void @testLeftGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.8b v0, v1, #3 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.8b v0, v1, #3 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.8b v2, #7 +; CHECK-GI-NEXT: shl.8b v1, v1, #3 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, %vshl_n = shl <8 x i8> %src2, %result = or <8 x i8> %and.i, %vshl_n @@ -15,14 +25,23 @@ define void @testLeftGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind } define void @testLeftBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.8b v2, #165 -; CHECK-NEXT: add.8b v1, v1, v1 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi.8b v2, #165 +; CHECK-SD-NEXT: add.8b v1, v1, v1 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.8b v2, #165 +; CHECK-GI-NEXT: shl.8b v1, v1, #1 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, %vshl_n = shl <8 x i8> %src2, %result = or <8 x i8> %and.i, %vshl_n @@ -31,11 +50,20 @@ define void @testLeftBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind } define void @testRightGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.8b v0, v1, #3 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.8b v0, v1, #3 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.8b v2, #224 +; CHECK-GI-NEXT: ushr.8b v1, v1, #3 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, %vshl_n = lshr <8 x i8> %src2, %result = or <8 x i8> %and.i, %vshl_n @@ -60,11 +88,20 @@ define void @testRightBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind } define void @testLeftGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood16x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.16b v0, v1, #3 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood16x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.16b v0, v1, #3 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood16x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.16b v2, #7 +; CHECK-GI-NEXT: shl.16b v1, v1, #3 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <16 x i8> %src1, %vshl_n = shl <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n @@ -73,14 +110,23 @@ define void @testLeftGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounw } define void @testLeftBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad16x8: -; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v2, #165 -; CHECK-NEXT: add.16b v1, v1, v1 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad16x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi.16b v2, #165 +; CHECK-SD-NEXT: add.16b v1, v1, v1 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad16x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.16b v2, #165 +; CHECK-GI-NEXT: shl.16b v1, v1, #1 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <16 x i8> %src1, %vshl_n = shl <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n @@ -89,11 +135,20 @@ define void @testLeftBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwi } define void @testRightGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood16x8: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.16b v0, v1, #3 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood16x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.16b v0, v1, #3 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood16x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.16b v2, #224 +; CHECK-GI-NEXT: ushr.16b v1, v1, #3 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <16 x i8> %src1, %vshl_n = lshr <16 x i8> %src2, %result = or <16 x i8> %and.i, %vshl_n @@ -118,11 +173,20 @@ define void @testRightBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounw } define void @testLeftGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.4h v0, v1, #14 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.4h v0, v1, #14 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.4h v2, #192, lsl #8 +; CHECK-GI-NEXT: shl.4h v1, v1, #14 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = shl <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -131,15 +195,25 @@ define void @testLeftGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounw } define void @testLeftBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: shl.4h v1, v1, #14 -; CHECK-NEXT: dup.4h v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: shl.4h v1, v1, #14 +; CHECK-SD-NEXT: dup.4h v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI9_0 +; CHECK-GI-NEXT: shl.4h v1, v1, #14 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI9_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = shl <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -148,11 +222,20 @@ define void @testLeftBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwi } define void @testRightGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.4h v0, v1, #14 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.4h v0, v1, #14 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.4h v2, #3 +; CHECK-GI-NEXT: ushr.4h v1, v1, #14 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = lshr <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -161,14 +244,24 @@ define void @testRightGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) noun } define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad4x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: dup.4h v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: usra.4h v0, v1, #14 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad4x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: dup.4h v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: usra.4h v0, v1, #14 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad4x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI11_0 +; CHECK-GI-NEXT: ushr.4h v1, v1, #14 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI11_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i16> %src1, %vshl_n = lshr <4 x i16> %src2, %result = or <4 x i16> %and.i, %vshl_n @@ -177,11 +270,20 @@ define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounw } define void @testLeftGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.8h v0, v1, #14 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.8h v0, v1, #14 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.8h v2, #192, lsl #8 +; CHECK-GI-NEXT: shl.8h v1, v1, #14 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = shl <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -190,15 +292,25 @@ define void @testLeftGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounw } define void @testLeftBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: shl.8h v1, v1, #14 -; CHECK-NEXT: dup.8h v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: shl.8h v1, v1, #14 +; CHECK-SD-NEXT: dup.8h v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI13_0 +; CHECK-GI-NEXT: shl.8h v1, v1, #14 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = shl <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -207,11 +319,20 @@ define void @testLeftBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwi } define void @testRightGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.8h v0, v1, #14 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.8h v0, v1, #14 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.8h v2, #3 +; CHECK-GI-NEXT: ushr.8h v1, v1, #14 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = lshr <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -220,14 +341,24 @@ define void @testRightGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) noun } define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad8x16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16500 -; CHECK-NEXT: dup.8h v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: usra.8h v0, v1, #14 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad8x16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #16500 // =0x4074 +; CHECK-SD-NEXT: dup.8h v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: usra.8h v0, v1, #14 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad8x16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI15_0 +; CHECK-GI-NEXT: ushr.8h v1, v1, #14 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i16> %src1, %vshl_n = lshr <8 x i16> %src2, %result = or <8 x i16> %and.i, %vshl_n @@ -236,11 +367,20 @@ define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounw } define void @testLeftGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.2s v0, v1, #22 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.2s v0, v1, #22 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.2s v2, #63, msl #16 +; CHECK-GI-NEXT: shl.2s v1, v1, #22 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = shl <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -249,15 +389,25 @@ define void @testLeftGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounw } define void @testLeftBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: shl.2s v1, v1, #22 -; CHECK-NEXT: dup.2s v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: shl.2s v1, v1, #22 +; CHECK-SD-NEXT: dup.2s v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI17_0 +; CHECK-GI-NEXT: shl.2s v1, v1, #22 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI17_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = shl <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -266,11 +416,20 @@ define void @testLeftBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwi } define void @testRightGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.2s v0, v1, #22 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.2s v0, v1, #22 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.2s v2, #3, msl #8 +; CHECK-GI-NEXT: ushr.2s v1, v1, #22 +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = lshr <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -279,15 +438,25 @@ define void @testRightGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) noun } define void @testRightBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad2x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: ushr.2s v1, v1, #22 -; CHECK-NEXT: dup.2s v2, w8 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad2x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: ushr.2s v1, v1, #22 +; CHECK-SD-NEXT: dup.2s v2, w8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad2x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI19_0 +; CHECK-GI-NEXT: ushr.2s v1, v1, #22 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI19_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i32> %src1, %vshl_n = lshr <2 x i32> %src2, %result = or <2 x i32> %and.i, %vshl_n @@ -296,11 +465,20 @@ define void @testRightBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounw } define void @testLeftGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.4s v0, v1, #22 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.4s v0, v1, #22 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.4s v2, #63, msl #16 +; CHECK-GI-NEXT: shl.4s v1, v1, #22 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = shl <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -309,15 +487,25 @@ define void @testLeftGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounw } define void @testLeftBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: shl.4s v1, v1, #22 -; CHECK-NEXT: dup.4s v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: shl.4s v1, v1, #22 +; CHECK-SD-NEXT: dup.4s v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-GI-NEXT: shl.4s v1, v1, #22 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = shl <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -326,11 +514,20 @@ define void @testLeftBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwi } define void @testRightGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.4s v0, v1, #22 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.4s v0, v1, #22 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni.4s v2, #3, msl #8 +; CHECK-GI-NEXT: ushr.4s v1, v1, #22 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = lshr <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -339,15 +536,25 @@ define void @testRightGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) noun } define void @testRightBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad4x32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4194300 -; CHECK-NEXT: ushr.4s v1, v1, #22 -; CHECK-NEXT: dup.4s v2, w8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad4x32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #4194300 // =0x3ffffc +; CHECK-SD-NEXT: ushr.4s v1, v1, #22 +; CHECK-SD-NEXT: dup.4s v2, w8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad4x32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI23_0 +; CHECK-GI-NEXT: ushr.4s v1, v1, #22 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <4 x i32> %src1, %vshl_n = lshr <4 x i32> %src2, %result = or <4 x i32> %and.i, %vshl_n @@ -356,11 +563,20 @@ define void @testRightBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounw } define void @testLeftGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftGood2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: sli.2d v0, v1, #48 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftGood2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sli.2d v0, v1, #48 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftGood2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.2d v2, #0x00ffffffffffff +; CHECK-GI-NEXT: shl.2d v1, v1, #48 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = shl <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -369,16 +585,26 @@ define void @testLeftGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounw } define void @testLeftBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftBad2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: shl.2d v1, v1, #48 -; CHECK-NEXT: movk x8, #1, lsl #48 -; CHECK-NEXT: dup.2d v2, x8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftBad2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #10 // =0xa +; CHECK-SD-NEXT: shl.2d v1, v1, #48 +; CHECK-SD-NEXT: movk x8, #1, lsl #48 +; CHECK-SD-NEXT: dup.2d v2, x8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftBad2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI25_0 +; CHECK-GI-NEXT: shl.2d v1, v1, #48 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = shl <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -387,11 +613,20 @@ define void @testLeftBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwi } define void @testRightGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightGood2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: sri.2d v0, v1, #48 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightGood2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sri.2d v0, v1, #48 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightGood2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi.2d v2, #0xffffffffffff0000 +; CHECK-GI-NEXT: ushr.2d v1, v1, #48 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = lshr <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -400,16 +635,26 @@ define void @testRightGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) noun } define void @testRightBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testRightBad2x64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: ushr.2d v1, v1, #48 -; CHECK-NEXT: movk x8, #1, lsl #48 -; CHECK-NEXT: dup.2d v2, x8 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testRightBad2x64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #10 // =0xa +; CHECK-SD-NEXT: ushr.2d v1, v1, #48 +; CHECK-SD-NEXT: movk x8, #1, lsl #48 +; CHECK-SD-NEXT: dup.2d v2, x8 +; CHECK-SD-NEXT: and.16b v0, v0, v2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testRightBad2x64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI27_0 +; CHECK-GI-NEXT: ushr.2d v1, v1, #48 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI27_0] +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: orr.16b v0, v0, v1 +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <2 x i64> %src1, %vshl_n = lshr <2 x i64> %src2, %result = or <2 x i64> %and.i, %vshl_n @@ -418,11 +663,19 @@ define void @testRightBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounw } define void @testLeftShouldNotCreateSLI1x128(<1 x i128> %src1, <1 x i128> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftShouldNotCreateSLI1x128: -; CHECK: // %bb.0: -; CHECK-NEXT: bfi x1, x2, #6, #58 -; CHECK-NEXT: stp x0, x1, [x4] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftShouldNotCreateSLI1x128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: bfi x1, x2, #6, #58 +; CHECK-SD-NEXT: stp x0, x1, [x4] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftShouldNotCreateSLI1x128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov.d v0[0], x0 +; CHECK-GI-NEXT: bfi x1, x2, #6, #58 +; CHECK-GI-NEXT: mov.d v0[1], x1 +; CHECK-GI-NEXT: str q0, [x4] +; CHECK-GI-NEXT: ret %and.i = and <1 x i128> %src1, %vshl_n = shl <1 x i128> %src2, %result = or <1 x i128> %and.i, %vshl_n diff --git a/llvm/test/CodeGen/AArch64/arm64-vclz.ll b/llvm/test/CodeGen/AArch64/arm64-vclz.ll index 38c0572e23f890..c65e75c89e8da9 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vclz.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vclz.ll @@ -1,154 +1,254 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; FALLBACK-NOT: remark{{.*}}test_vclz_u8 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u8: - ; CHECK: clz.8b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind ret <8 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s8 define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s8: - ; CHECK: clz.8b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind ret <8 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_u16 define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u16: - ; CHECK: clz.4h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind ret <4 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s16 define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s16: - ; CHECK: clz.4h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind ret <4 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_u32 define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u32: - ; CHECK: clz.2s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.2s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind ret <2 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s32 define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s32: - ; CHECK: clz.2s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclz_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.2s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind ret <2 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_u64 define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_u64: +; CHECK-SD-LABEL: test_vclz_u64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr d1, d0, #1 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #4 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #8 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #16 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #32 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: mvn.8b v0, v0 +; CHECK-SD-NEXT: cnt.8b v0, v0 +; CHECK-SD-NEXT: uaddlp.4h v0, v0 +; CHECK-SD-NEXT: uaddlp.2s v0, v0 +; CHECK-SD-NEXT: uaddlp.1d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclz_u64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind ret <1 x i64> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclz_s64 define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclz_s64: +; CHECK-SD-LABEL: test_vclz_s64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr d1, d0, #1 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #4 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #8 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #16 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: ushr d1, d0, #32 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: mvn.8b v0, v0 +; CHECK-SD-NEXT: cnt.8b v0, v0 +; CHECK-SD-NEXT: uaddlp.4h v0, v0 +; CHECK-SD-NEXT: uaddlp.2s v0, v0 +; CHECK-SD-NEXT: uaddlp.1d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclz_s64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind ret <1 x i64> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u8 define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u8: - ; CHECK: clz.16b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.16b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind ret <16 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s8 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s8: - ; CHECK: clz.16b v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.16b v0, v0 +; CHECK-NEXT: ret %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind ret <16 x i8> %vclz.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u16 define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u16: - ; CHECK: clz.8h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind ret <8 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s16 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s16: - ; CHECK: clz.8h v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.8h v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind ret <8 x i16> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u32 define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u32: - ; CHECK: clz.4s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind ret <4 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s32 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s32: - ; CHECK: clz.4s v0, v0 - ; CHECK-NEXT: ret +; CHECK-LABEL: test_vclzq_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: clz.4s v0, v0 +; CHECK-NEXT: ret %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind ret <4 x i32> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_u64 define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_u64: +; CHECK-SD-LABEL: test_vclzq_u64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr.2d v1, v0, #1 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #4 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #8 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #16 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #32 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: mvn.16b v0, v0 +; CHECK-SD-NEXT: cnt.16b v0, v0 +; CHECK-SD-NEXT: uaddlp.8h v0, v0 +; CHECK-SD-NEXT: uaddlp.4s v0, v0 +; CHECK-SD-NEXT: uaddlp.2d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclzq_u64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: mov.d x9, v0[1] +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: mov.d v0[0], x8 +; CHECK-GI-NEXT: clz x8, x9 +; CHECK-GI-NEXT: mov.d v0[1], x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind ret <2 x i64> %vclz1.i } -; FALLBACK-NOT: remark{{.*}}test_vclzq_s64 define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp { - ; CHECK-LABEL: test_vclzq_s64: +; CHECK-SD-LABEL: test_vclzq_s64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushr.2d v1, v0, #1 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #2 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #4 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #8 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #16 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: ushr.2d v1, v0, #32 +; CHECK-SD-NEXT: orr.16b v0, v0, v1 +; CHECK-SD-NEXT: mvn.16b v0, v0 +; CHECK-SD-NEXT: cnt.16b v0, v0 +; CHECK-SD-NEXT: uaddlp.8h v0, v0 +; CHECK-SD-NEXT: uaddlp.4s v0, v0 +; CHECK-SD-NEXT: uaddlp.2d v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vclzq_s64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: mov.d x9, v0[1] +; CHECK-GI-NEXT: clz x8, x8 +; CHECK-GI-NEXT: mov.d v0[0], x8 +; CHECK-GI-NEXT: clz x8, x9 +; CHECK-GI-NEXT: mov.d v0[1], x8 +; CHECK-GI-NEXT: ret %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind ret <2 x i64> %vclz1.i } declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone - declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone - declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone - declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone - declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>, i1) nounwind readnone - declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone - declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone - declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 7af7c235f9ac16..2f543cc324bc22 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -1,12 +1,114 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for sqshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshl_scalar_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for urshr_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srshr_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu1d_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i64_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_i32_constant +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshrun4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrshrun4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqrshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uqshrn4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_vscalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_ushl_scalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_vscalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for neon_sshll_scalar_constant_shift_m1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ursra_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for srsra_scalar +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqshlu_zero_shift_amount +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqshl.8b v0, v0, v1 +; CHECK-NEXT: sqshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -19,7 +121,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqshl.4h v0, v0, v1 +; CHECK-NEXT: sqshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -32,7 +134,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqshl.2s v0, v0, v1 +; CHECK-NEXT: sqshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -97,7 +199,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqshl.8b v0, v0, v1 +; CHECK-NEXT: uqshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -110,7 +212,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqshl.4h v0, v0, v1 +; CHECK-NEXT: uqshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -123,7 +225,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqshl.2s v0, v0, v1 +; CHECK-NEXT: uqshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -136,7 +238,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.16b v0, v0, v1 +; CHECK-NEXT: sqshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -149,7 +251,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.8h v0, v0, v1 +; CHECK-NEXT: sqshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -162,7 +264,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.4s v0, v0, v1 +; CHECK-NEXT: sqshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -175,7 +277,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshl.2d v0, v0, v1 +; CHECK-NEXT: sqshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -188,7 +290,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.16b v0, v0, v1 +; CHECK-NEXT: uqshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -201,7 +303,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.8h v0, v0, v1 +; CHECK-NEXT: uqshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -214,7 +316,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.4s v0, v0, v1 +; CHECK-NEXT: uqshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -227,7 +329,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshl.2d v0, v0, v1 +; CHECK-NEXT: uqshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -315,7 +417,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: srshl.8b v0, v0, v1 +; CHECK-NEXT: srshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -328,7 +430,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: srshl.4h v0, v0, v1 +; CHECK-NEXT: srshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -341,7 +443,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: srshl.2s v0, v0, v1 +; CHECK-NEXT: srshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -394,10 +496,10 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind { define i64 @srshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: srshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -411,7 +513,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: urshl.8b v0, v0, v1 +; CHECK-NEXT: urshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -424,7 +526,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: urshl.4h v0, v0, v1 +; CHECK-NEXT: urshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -437,7 +539,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: urshl.2s v0, v0, v1 +; CHECK-NEXT: urshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -490,10 +592,10 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind { define i64 @urshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: urshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -507,7 +609,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.16b v0, v0, v1 +; CHECK-NEXT: srshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -520,7 +622,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.8h v0, v0, v1 +; CHECK-NEXT: srshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -533,7 +635,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.4s v0, v0, v1 +; CHECK-NEXT: srshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -546,7 +648,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: srshl.2d v0, v0, v1 +; CHECK-NEXT: srshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -559,7 +661,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.16b v0, v0, v1 +; CHECK-NEXT: urshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -572,7 +674,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.8h v0, v0, v1 +; CHECK-NEXT: urshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -585,7 +687,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.4s v0, v0, v1 +; CHECK-NEXT: urshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -598,7 +700,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: urshl.2d v0, v0, v1 +; CHECK-NEXT: urshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -633,7 +735,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrshl.8b v0, v0, v1 +; CHECK-NEXT: sqrshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -646,7 +748,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrshl.4h v0, v0, v1 +; CHECK-NEXT: sqrshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -659,7 +761,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqrshl.2s v0, v0, v1 +; CHECK-NEXT: sqrshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -672,7 +774,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqrshl.8b v0, v0, v1 +; CHECK-NEXT: uqrshl v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -685,7 +787,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqrshl.4h v0, v0, v1 +; CHECK-NEXT: uqrshl v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -698,7 +800,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqrshl.2s v0, v0, v1 +; CHECK-NEXT: uqrshl v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -711,7 +813,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.16b v0, v0, v1 +; CHECK-NEXT: sqrshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -724,7 +826,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.8h v0, v0, v1 +; CHECK-NEXT: sqrshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -737,7 +839,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.4s v0, v0, v1 +; CHECK-NEXT: sqrshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -750,7 +852,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshl.2d v0, v0, v1 +; CHECK-NEXT: sqrshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -803,10 +905,10 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind { define i64 @sqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: sqrshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -820,7 +922,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.16b v0, v0, v1 +; CHECK-NEXT: uqrshl v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -833,7 +935,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.8h v0, v0, v1 +; CHECK-NEXT: uqrshl v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -846,7 +948,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.4s v0, v0, v1 +; CHECK-NEXT: uqrshl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -859,7 +961,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshl.2d v0, v0, v1 +; CHECK-NEXT: uqrshl v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -912,10 +1014,10 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind { define i64 @uqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: uqrshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -947,77 +1049,126 @@ declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i8> @urshr8b(ptr %A) nounwind { -; CHECK-LABEL: urshr8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: urshr.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: urshr v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @urshr4h(ptr %A) nounwind { -; CHECK-LABEL: urshr4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: urshr.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: urshr v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @urshr2s(ptr %A) nounwind { -; CHECK-LABEL: urshr2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: urshr.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: urshr v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @urshr16b(ptr %A) nounwind { -; CHECK-LABEL: urshr16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @urshr8h(ptr %A) nounwind { -; CHECK-LABEL: urshr8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @urshr4s(ptr %A) nounwind { -; CHECK-LABEL: urshr4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @urshr2d(ptr %A) nounwind { -; CHECK-LABEL: urshr2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: urshr.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshr2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: urshr v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshr2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 @@ -1047,77 +1198,126 @@ define i64 @urshr_scalar(ptr %A) nounwind { } define <8 x i8> @srshr8b(ptr %A) nounwind { -; CHECK-LABEL: srshr8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: srshr.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: srshr v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @srshr4h(ptr %A) nounwind { -; CHECK-LABEL: srshr4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: srshr.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: srshr v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @srshr2s(ptr %A) nounwind { -; CHECK-LABEL: srshr2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: srshr.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: srshr v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @srshr16b(ptr %A) nounwind { -; CHECK-LABEL: srshr16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @srshr8h(ptr %A) nounwind { -; CHECK-LABEL: srshr8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @srshr4s(ptr %A) nounwind { -; CHECK-LABEL: srshr4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @srshr2d(ptr %A) nounwind { -; CHECK-LABEL: srshr2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: srshr.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshr2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: srshr v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshr2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 @@ -1150,7 +1350,7 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind { ; CHECK-LABEL: sqshlu8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshlu.8b v0, v0, #1 +; CHECK-NEXT: sqshlu v0.8b, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> ) @@ -1161,7 +1361,7 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind { ; CHECK-LABEL: sqshlu4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshlu.4h v0, v0, #1 +; CHECK-NEXT: sqshlu v0.4h, v0.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> ) @@ -1172,7 +1372,7 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind { ; CHECK-LABEL: sqshlu2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshlu.2s v0, v0, #1 +; CHECK-NEXT: sqshlu v0.2s, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> ) @@ -1183,7 +1383,7 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind { ; CHECK-LABEL: sqshlu16b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.16b v0, v0, #1 +; CHECK-NEXT: sqshlu v0.16b, v0.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> ) @@ -1194,7 +1394,7 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind { ; CHECK-LABEL: sqshlu8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.8h v0, v0, #1 +; CHECK-NEXT: sqshlu v0.8h, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> ) @@ -1205,7 +1405,7 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind { ; CHECK-LABEL: sqshlu4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.4s v0, v0, #1 +; CHECK-NEXT: sqshlu v0.4s, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> ) @@ -1216,7 +1416,7 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind { ; CHECK-LABEL: sqshlu2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshlu.2d v0, v0, #1 +; CHECK-NEXT: sqshlu v0.2d, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> ) @@ -1275,7 +1475,7 @@ define <8 x i8> @rshrn8b(ptr %A) nounwind { ; CHECK-LABEL: rshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: rshrn.8b v0, v0, #1 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1286,7 +1486,7 @@ define <4 x i16> @rshrn4h(ptr %A) nounwind { ; CHECK-LABEL: rshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: rshrn.4h v0, v0, #1 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1297,7 +1497,7 @@ define <2 x i32> @rshrn2s(ptr %A) nounwind { ; CHECK-LABEL: rshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: rshrn.2s v0, v0, #1 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1309,7 +1509,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: rshrn2.16b v0, v1, #1 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1323,7 +1523,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: rshrn2.8h v0, v1, #1 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1337,7 +1537,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: rshrn2.4s v0, v1, #1 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1354,7 +1554,7 @@ define <8 x i8> @shrn8b(ptr %A) nounwind { ; CHECK-LABEL: shrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: shrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = lshr <8 x i16> %tmp1, @@ -1366,7 +1566,7 @@ define <4 x i16> @shrn4h(ptr %A) nounwind { ; CHECK-LABEL: shrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shrn.4h v0, v0, #1 +; CHECK-NEXT: shrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = lshr <4 x i32> %tmp1, @@ -1378,7 +1578,7 @@ define <2 x i32> @shrn2s(ptr %A) nounwind { ; CHECK-LABEL: shrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shrn.2s v0, v0, #1 +; CHECK-NEXT: shrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = lshr <2 x i64> %tmp1, @@ -1391,7 +1591,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: shrn2.16b v0, v1, #1 +; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1406,7 +1606,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: shrn2.8h v0, v1, #1 +; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1421,7 +1621,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: shrn2.4s v0, v1, #1 +; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1450,7 +1650,7 @@ define <8 x i8> @sqshrn8b(ptr %A) nounwind { ; CHECK-LABEL: sqshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrn.8b v0, v0, #1 +; CHECK-NEXT: sqshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1461,7 +1661,7 @@ define <4 x i16> @sqshrn4h(ptr %A) nounwind { ; CHECK-LABEL: sqshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrn.4h v0, v0, #1 +; CHECK-NEXT: sqshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1472,7 +1672,7 @@ define <2 x i32> @sqshrn2s(ptr %A) nounwind { ; CHECK-LABEL: sqshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrn.2s v0, v0, #1 +; CHECK-NEXT: sqshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1485,7 +1685,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrn2.16b v0, v1, #1 +; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1499,7 +1699,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrn2.8h v0, v1, #1 +; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1513,7 +1713,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrn2.4s v0, v1, #1 +; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1542,7 +1742,7 @@ define <8 x i8> @sqshrun8b(ptr %A) nounwind { ; CHECK-LABEL: sqshrun8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrun.8b v0, v0, #1 +; CHECK-NEXT: sqshrun v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) @@ -1553,7 +1753,7 @@ define <4 x i16> @sqshrun4h(ptr %A) nounwind { ; CHECK-LABEL: sqshrun4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrun.4h v0, v0, #1 +; CHECK-NEXT: sqshrun v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) @@ -1564,7 +1764,7 @@ define <2 x i32> @sqshrun2s(ptr %A) nounwind { ; CHECK-LABEL: sqshrun2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshrun.2s v0, v0, #1 +; CHECK-NEXT: sqshrun v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) @@ -1576,7 +1776,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrun2.16b v0, v1, #1 +; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1590,7 +1790,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrun2.8h v0, v1, #1 +; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1604,7 +1804,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqshrun2.4s v0, v1, #1 +; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1633,7 +1833,7 @@ define <8 x i8> @sqrshrn8b(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrn.8b v0, v0, #1 +; CHECK-NEXT: sqrshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1644,7 +1844,7 @@ define <4 x i16> @sqrshrn4h(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrn.4h v0, v0, #1 +; CHECK-NEXT: sqrshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1655,7 +1855,7 @@ define <2 x i32> @sqrshrn2s(ptr %A) nounwind { ; CHECK-LABEL: sqrshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrn.2s v0, v0, #1 +; CHECK-NEXT: sqrshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1667,7 +1867,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrn2.16b v0, v1, #1 +; CHECK-NEXT: sqrshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1681,7 +1881,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrn2.8h v0, v1, #1 +; CHECK-NEXT: sqrshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1695,7 +1895,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrn2.4s v0, v1, #1 +; CHECK-NEXT: sqrshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1724,7 +1924,7 @@ define <8 x i8> @sqrshrun8b(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrun.8b v0, v0, #1 +; CHECK-NEXT: sqrshrun v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) @@ -1735,7 +1935,7 @@ define <4 x i16> @sqrshrun4h(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrun.4h v0, v0, #1 +; CHECK-NEXT: sqrshrun v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) @@ -1746,7 +1946,7 @@ define <2 x i32> @sqrshrun2s(ptr %A) nounwind { ; CHECK-LABEL: sqrshrun2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqrshrun.2s v0, v0, #1 +; CHECK-NEXT: sqrshrun v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) @@ -1758,7 +1958,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrun2.16b v0, v1, #1 +; CHECK-NEXT: sqrshrun2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1772,7 +1972,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrun2.8h v0, v1, #1 +; CHECK-NEXT: sqrshrun2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1786,7 +1986,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sqrshrun2.4s v0, v1, #1 +; CHECK-NEXT: sqrshrun2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1815,7 +2015,7 @@ define <8 x i8> @uqrshrn8b(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqrshrn.8b v0, v0, #1 +; CHECK-NEXT: uqrshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1826,7 +2026,7 @@ define <4 x i16> @uqrshrn4h(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqrshrn.4h v0, v0, #1 +; CHECK-NEXT: uqrshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1837,7 +2037,7 @@ define <2 x i32> @uqrshrn2s(ptr %A) nounwind { ; CHECK-LABEL: uqrshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqrshrn.2s v0, v0, #1 +; CHECK-NEXT: uqrshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1849,7 +2049,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshrn2.16b v0, v1, #1 +; CHECK-NEXT: uqrshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1863,7 +2063,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshrn2.8h v0, v1, #1 +; CHECK-NEXT: uqrshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1877,7 +2077,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqrshrn2.4s v0, v1, #1 +; CHECK-NEXT: uqrshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1906,7 +2106,7 @@ define <8 x i8> @uqshrn8b(ptr %A) nounwind { ; CHECK-LABEL: uqshrn8b: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshrn.8b v0, v0, #1 +; CHECK-NEXT: uqshrn v0.8b, v0.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) @@ -1917,7 +2117,7 @@ define <4 x i16> @uqshrn4h(ptr %A) nounwind { ; CHECK-LABEL: uqshrn4h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshrn.4h v0, v0, #1 +; CHECK-NEXT: uqshrn v0.4h, v0.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) @@ -1928,7 +2128,7 @@ define <2 x i32> @uqshrn2s(ptr %A) nounwind { ; CHECK-LABEL: uqshrn2s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshrn.2s v0, v0, #1 +; CHECK-NEXT: uqshrn v0.2s, v0.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) @@ -1940,7 +2140,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshrn2.16b v0, v1, #1 +; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret %out = load <8 x i8>, ptr %ret %tmp1 = load <8 x i16>, ptr %A @@ -1954,7 +2154,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshrn2.8h v0, v1, #1 +; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret %out = load <4 x i16>, ptr %ret %tmp1 = load <4 x i32>, ptr %A @@ -1968,7 +2168,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uqshrn2.4s v0, v1, #1 +; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret %out = load <2 x i32>, ptr %ret %tmp1 = load <2 x i64>, ptr %A @@ -1986,7 +2186,7 @@ define <8 x i16> @ushll8h(ptr %A) nounwind { ; CHECK-LABEL: ushll8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #1 +; CHECK-NEXT: ushll v0.8h, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -1998,7 +2198,7 @@ define <4 x i32> @ushll4s(ptr %A) nounwind { ; CHECK-LABEL: ushll4s: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.4s v0, v0, #1 +; CHECK-NEXT: ushll v0.4s, v0.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> @@ -2010,7 +2210,7 @@ define <2 x i64> @ushll2d(ptr %A) nounwind { ; CHECK-LABEL: ushll2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.2d v0, v0, #1 +; CHECK-NEXT: ushll v0.2d, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> @@ -2019,11 +2219,18 @@ define <2 x i64> @ushll2d(ptr %A) nounwind { } define <8 x i16> @ushll2_8h(ptr %A) nounwind { -; CHECK-LABEL: ushll2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ushll.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushll2_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushll2_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #1 +; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -2032,11 +2239,18 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind { } define <4 x i32> @ushll2_4s(ptr %A) nounwind { -; CHECK-LABEL: ushll2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushll2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushll2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> @@ -2045,11 +2259,18 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind { } define <2 x i64> @ushll2_2d(ptr %A) nounwind { -; CHECK-LABEL: ushll2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: ushll.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushll2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushll2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> @@ -2064,24 +2285,32 @@ declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>) declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>) declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64) -define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll8h_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #1 -; CHECK-NEXT: ret +define <8 x i16> @neon_ushll8h_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll8h_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll8h_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.8h, #1 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushl v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> ) ret <8 x i16> %tmp3 } -define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl8h_no_constant_shift: +define <8 x i16> @neon_ushl8h_no_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_ushl8h_no_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushl.8h v0, v0, v0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushl v0.8h, v0.8h, v0.8h ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -2089,36 +2318,76 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind { ret <8 x i16> %tmp3 } -define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushl8h_constant_shift_extend_not_2x: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushl8h_constant_shift_extend_not_2x: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: uxtb w8, w8 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v1.b[3] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: fmov w10, s3 +; CHECK-GI-NEXT: fmov w11, s4 +; CHECK-GI-NEXT: uxtb w9, w9 +; CHECK-GI-NEXT: uxtb w10, w10 +; CHECK-GI-NEXT: uxtb w11, w11 +; CHECK-GI-NEXT: fmov s2, w9 +; CHECK-GI-NEXT: mov v1.h[1], w10 +; CHECK-GI-NEXT: mov v2.h[1], w11 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i8>, ptr %A %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } -define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl8_noext_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: add.8h v0, v0, v0 -; CHECK-NEXT: ret +define <8 x i16> @neon_ushl8_noext_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushl8_noext_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushl8_noext_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8h, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ushl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } -define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll4s_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushll4s_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll4s_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll4s_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.4s, #1 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -2126,13 +2395,21 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind { } ; FIXME: unnecessary ushll.4s v0, v0, #0? -define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll4s_neg_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushr.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushll4s_neg_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll4s_neg_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll4s_neg_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -2140,35 +2417,52 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind { } ; FIXME: should be constant folded. -define <4 x i32> @neon.ushll4s_constant_fold() nounwind { -; CHECK-LABEL: neon.ushll4s_constant_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI160_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI160_0] -; CHECK-NEXT: add.4s v0, v0, v0 -; CHECK-NEXT: ret +define <4 x i32> @neon_ushll4s_constant_fold() nounwind { +; CHECK-SD-LABEL: neon_ushll4s_constant_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI160_0 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI160_0] +; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll4s_constant_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: adrp x8, .LCPI160_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0] +; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) ret <4 x i32> %tmp3 } -define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushll2d_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ushll.2d v0, v0, #1 -; CHECK-NEXT: ret +define <2 x i64> @neon_ushll2d_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_ushll2d_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_ushll2d_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI161_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } -define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl_vscalar_constant_shift: +define <1 x i64> @neon_ushl_vscalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_ushl_vscalar_constant_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: zip1.2s v0, v0, v1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i32>, ptr %A @@ -2177,8 +2471,8 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind { ret <1 x i64> %tmp3 } -define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.ushl_scalar_constant_shift: +define i64 @neon_ushl_scalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_ushl_scalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 @@ -2195,7 +2489,7 @@ define <8 x i16> @sshll8h(ptr %A) nounwind { ; CHECK-LABEL: sshll8h: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #1 +; CHECK-NEXT: sshll v0.8h, v0.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> @@ -2207,7 +2501,7 @@ define <2 x i64> @sshll2d(ptr %A) nounwind { ; CHECK-LABEL: sshll2d: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.2d v0, v0, #1 +; CHECK-NEXT: sshll v0.2d, v0.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> @@ -2222,85 +2516,156 @@ declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>) declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>) declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64) -define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl16b_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: add.16b v0, v0, v0 -; CHECK-NEXT: ret +define <16 x i8> @neon_sshl16b_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl16b_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl16b_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } -define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl16b_non_splat_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI167_0 -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI167_0] -; CHECK-NEXT: sshl.16b v0, v0, v1 -; CHECK-NEXT: ret +define <16 x i8> @neon_sshl16b_non_splat_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl16b_non_splat_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI167_0 +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI167_0] +; CHECK-SD-NEXT: sshl v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl16b_non_splat_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI167_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI167_0] +; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } -define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl16b_neg_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sshr.16b v0, v0, #2 -; CHECK-NEXT: ret +define <16 x i8> @neon_sshl16b_neg_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl16b_neg_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl16b_neg_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #254 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp2 } -define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll8h_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #1 -; CHECK-NEXT: ret +define <8 x i16> @neon_sshll8h_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll8h_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll8h_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.8h, #1 +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> ) ret <8 x i16> %tmp3 } -define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshl4s_wrong_ext_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl4s_wrong_ext_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl4s_wrong_ext_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: sxtb w8, w8 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v1.b[3] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: fmov w10, s3 +; CHECK-GI-NEXT: fmov w11, s4 +; CHECK-GI-NEXT: sxtb w9, w9 +; CHECK-GI-NEXT: sxtb w10, w10 +; CHECK-GI-NEXT: sxtb w11, w11 +; CHECK-GI-NEXT: fmov s2, w9 +; CHECK-GI-NEXT: mov v1.h[1], w10 +; CHECK-GI-NEXT: mov v2.h[1], w11 +; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i8>, ptr %A %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } -define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll4s_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshll4s_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll4s_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll4s_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.4s, #1 +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) ret <4 x i32> %tmp3 } -define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll4s_neg_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshr.4s v0, v0, #1 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshll4s_neg_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll4s_neg_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll4s_neg_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -2308,46 +2673,70 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind { } ; FIXME: should be constant folded. -define <4 x i32> @neon.sshl4s_constant_fold() nounwind { -; CHECK-LABEL: neon.sshl4s_constant_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI173_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI173_0] -; CHECK-NEXT: shl.4s v0, v0, #2 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshl4s_constant_fold() nounwind { +; CHECK-SD-LABEL: neon_sshl4s_constant_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI173_0 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI173_0] +; CHECK-SD-NEXT: shl v0.4s, v0.4s, #2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl4s_constant_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #2 +; CHECK-GI-NEXT: adrp x8, .LCPI173_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI173_0] +; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) ret <4 x i32> %tmp3 } -define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl4s_no_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: add.4s v0, v0, v0 -; CHECK-NEXT: ret +define <4 x i32> @neon_sshl4s_no_fold(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl4s_no_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl4s_no_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } -define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll2d_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sshll.2d v0, v0, #1 -; CHECK-NEXT: ret +define <2 x i64> @neon_sshll2d_constant_shift(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshll2d_constant_shift: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshll2d_constant_shift: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI175_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI175_0] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } -define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll_vscalar_constant_shift: +define <1 x i64> @neon_sshll_vscalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_sshll_vscalar_constant_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: zip1.2s v0, v0, v1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s ; CHECK-NEXT: shl d0, d0, #1 ; CHECK-NEXT: ret %tmp1 = load <1 x i32>, ptr %A @@ -2356,8 +2745,8 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind { ret <1 x i64> %tmp3 } -define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll_scalar_constant_shift: +define i64 @neon_sshll_scalar_constant_shift(ptr %A) nounwind { +; CHECK-LABEL: neon_sshll_scalar_constant_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 @@ -2370,8 +2759,8 @@ define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind { ret i64 %tmp3 } -define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind { -; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1: +define i64 @neon_sshll_scalar_constant_shift_m1(ptr %A) nounwind { +; CHECK-LABEL: neon_sshll_scalar_constant_shift_m1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov d0, x8 @@ -2385,34 +2774,58 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind { } ; FIXME: should be constant folded. -define <2 x i64> @neon.sshl2d_constant_fold() nounwind { -; CHECK-LABEL: neon.sshl2d_constant_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI179_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI179_0] -; CHECK-NEXT: add.2d v0, v0, v0 -; CHECK-NEXT: ret +define <2 x i64> @neon_sshl2d_constant_fold() nounwind { +; CHECK-SD-LABEL: neon_sshl2d_constant_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI179_0 +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI179_0] +; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl2d_constant_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI179_1 +; CHECK-GI-NEXT: adrp x9, .LCPI179_0 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI179_1] +; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI179_0] +; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) ret <2 x i64> %tmp3 } -define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind { -; CHECK-LABEL: neon.sshl2d_no_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: shl.2d v0, v0, #2 -; CHECK-NEXT: ret +define <2 x i64> @neon_sshl2d_no_fold(ptr %A) nounwind { +; CHECK-SD-LABEL: neon_sshl2d_no_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: neon_sshl2d_no_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI180_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI180_0] +; CHECK-GI-NEXT: sshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp2 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i16> @sshll2_8h(ptr %A) nounwind { -; CHECK-LABEL: sshll2_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: sshll.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshll2_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshll2_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #1 +; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> @@ -2421,11 +2834,18 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind { } define <4 x i32> @sshll2_4s(ptr %A) nounwind { -; CHECK-LABEL: sshll2_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshll2_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshll2_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #1 +; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> @@ -2434,11 +2854,18 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind { } define <2 x i64> @sshll2_2d(ptr %A) nounwind { -; CHECK-LABEL: sshll2_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: sshll.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshll2_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0, #8] +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshll2_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #1 +; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> @@ -2447,88 +2874,145 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind { } define <8 x i8> @sqshli8b(ptr %A) nounwind { -; CHECK-LABEL: sqshli8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshl.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sqshl v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8b, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: sqshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 } define <4 x i16> @sqshli4h(ptr %A) nounwind { -; CHECK-LABEL: sqshli4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshl.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sqshl v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4h, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: sqshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @sqshli2s(ptr %A) nounwind { -; CHECK-LABEL: sqshli2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: sqshl.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: sqshl v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: sqshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @sqshli16b(ptr %A) nounwind { -; CHECK-LABEL: sqshli16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sqshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @sqshli8h(ptr %A) nounwind { -; CHECK-LABEL: sqshli8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8h, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sqshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @sqshli4s(ptr %A) nounwind { -; CHECK-LABEL: sqshli4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: sqshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @sqshli2d(ptr %A) nounwind { -; CHECK-LABEL: sqshli2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sqshl.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshli2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: sqshl v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshli2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI190_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI190_0] +; CHECK-GI-NEXT: sqshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i8> @uqshli8b(ptr %A) nounwind { -; CHECK-LABEL: uqshli8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.8b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: uqshl v0.8b, v0.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8b, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: uqshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) ret <8 x i8> %tmp3 @@ -2537,9 +3021,9 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind { define <8 x i8> @uqshli8b_1(ptr %A) nounwind { ; CHECK-LABEL: uqshli8b_1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.8b v1, #8 -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.8b v0, v0, v1 +; CHECK-NEXT: movi v0.8b, #8 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqshl v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) @@ -2547,78 +3031,130 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind { } define <4 x i16> @uqshli4h(ptr %A) nounwind { -; CHECK-LABEL: uqshli4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.4h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: uqshl v0.4h, v0.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4h, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: uqshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) ret <4 x i16> %tmp3 } define <2 x i32> @uqshli2s(ptr %A) nounwind { -; CHECK-LABEL: uqshli2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uqshl.2s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: uqshl v0.2s, v0.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #1 +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: uqshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) ret <2 x i32> %tmp3 } define <16 x i8> @uqshli16b(ptr %A) nounwind { -; CHECK-LABEL: uqshli16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.16b v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.16b, v0.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.16b, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: uqshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) ret <16 x i8> %tmp3 } define <8 x i16> @uqshli8h(ptr %A) nounwind { -; CHECK-LABEL: uqshli8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.8h v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.8h, v0.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.8h, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: uqshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) ret <8 x i16> %tmp3 } define <4 x i32> @uqshli4s(ptr %A) nounwind { -; CHECK-LABEL: uqshli4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.4s v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.4s, v0.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.4s, #1 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: uqshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) ret <4 x i32> %tmp3 } define <2 x i64> @uqshli2d(ptr %A) nounwind { -; CHECK-LABEL: uqshli2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: uqshl.2d v0, v0, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshli2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: uqshl v0.2d, v0.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshli2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI198_0 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI198_0] +; CHECK-GI-NEXT: uqshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) ret <2 x i64> %tmp3 } define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ursra.8b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: ursra v0.8b, v1.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>, ptr %B @@ -2627,12 +3163,21 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind { } define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ursra.4h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: ursra v0.4h, v1.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>, ptr %B @@ -2641,12 +3186,21 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind { } define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ursra.2s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: ursra v0.2s, v1.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: urshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>, ptr %B @@ -2655,12 +3209,21 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind { } define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.16b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.16b, v1.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>, ptr %B @@ -2669,12 +3232,21 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind { } define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.8h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.8h, v1.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>, ptr %B @@ -2683,12 +3255,21 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind { } define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.4s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.4s, v1.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>, ptr %B @@ -2697,12 +3278,21 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: ursra2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ursra.2d v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ursra2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: ursra v0.2d, v1.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ursra2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: urshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>, ptr %B @@ -2740,12 +3330,21 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind { } define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: srsra.8b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: srsra v0.8b, v1.8b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) %tmp4 = load <8 x i8>, ptr %B @@ -2754,12 +3353,21 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind { } define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: srsra.4h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: srsra v0.4h, v1.4h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) %tmp4 = load <4 x i16>, ptr %B @@ -2768,12 +3376,21 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind { } define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: srsra.2s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: srsra v0.2s, v1.2s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr d1, [x0] +; CHECK-GI-NEXT: srshl v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) %tmp4 = load <2 x i32>, ptr %B @@ -2782,12 +3399,21 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind { } define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.16b v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.16b, v1.16b, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.16b, v1.16b, v0.16b +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) %tmp4 = load <16 x i8>, ptr %B @@ -2796,12 +3422,21 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind { } define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.8h v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.8h, v1.8h, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.8h, v1.8h, v0.8h +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) %tmp4 = load <8 x i16>, ptr %B @@ -2810,12 +3445,21 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind { } define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.4s v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.4s, v1.4s, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) %tmp4 = load <4 x i32>, ptr %B @@ -2824,12 +3468,21 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: srsra2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: srsra.2d v0, v1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srsra2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q1, [x0] +; CHECK-SD-NEXT: ldr q0, [x1] +; CHECK-SD-NEXT: srsra v0.2d, v1.2d, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srsra2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: srshl v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) %tmp4 = load <2 x i64>, ptr %B @@ -2871,7 +3524,7 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra.8b v0, v1, #1 +; CHECK-NEXT: usra v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = lshr <8 x i8> %tmp1, @@ -2885,7 +3538,7 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra.4h v0, v1, #1 +; CHECK-NEXT: usra v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = lshr <4 x i16> %tmp1, @@ -2899,7 +3552,7 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra.2s v0, v1, #1 +; CHECK-NEXT: usra v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = lshr <2 x i32> %tmp1, @@ -2913,7 +3566,7 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.16b v0, v1, #1 +; CHECK-NEXT: usra v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = lshr <16 x i8> %tmp1, @@ -2927,7 +3580,7 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.8h v0, v1, #1 +; CHECK-NEXT: usra v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = lshr <8 x i16> %tmp1, @@ -2941,7 +3594,7 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.4s v0, v1, #1 +; CHECK-NEXT: usra v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = lshr <4 x i32> %tmp1, @@ -2955,7 +3608,7 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: usra.2d v0, v1, #1 +; CHECK-NEXT: usra v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = lshr <2 x i64> %tmp1, @@ -2965,12 +3618,20 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind { } define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: usra1d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: usra d0, d1, #1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: usra1d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d1, [x0] +; CHECK-SD-NEXT: ldr d0, [x1] +; CHECK-SD-NEXT: usra d0, d1, #1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: usra1d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr x8, [x0] +; CHECK-GI-NEXT: ldr x9, [x1] +; CHECK-GI-NEXT: add x8, x9, x8, lsr #1 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp3 = lshr <1 x i64> %tmp1, %tmp4 = load <1 x i64>, ptr %B @@ -2983,7 +3644,7 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ssra.8b v0, v1, #1 +; CHECK-NEXT: ssra v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp3 = ashr <8 x i8> %tmp1, @@ -2997,7 +3658,7 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ssra.4h v0, v1, #1 +; CHECK-NEXT: ssra v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp3 = ashr <4 x i16> %tmp1, @@ -3011,7 +3672,7 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ssra.2s v0, v1, #1 +; CHECK-NEXT: ssra v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp3 = ashr <2 x i32> %tmp1, @@ -3025,7 +3686,7 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.16b v0, v1, #1 +; CHECK-NEXT: ssra v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp3 = ashr <16 x i8> %tmp1, @@ -3039,7 +3700,7 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.8h v0, v1, #1 +; CHECK-NEXT: ssra v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp3 = ashr <8 x i16> %tmp1, @@ -3053,7 +3714,7 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.4s v0, v1, #1 +; CHECK-NEXT: ssra v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp3 = ashr <4 x i32> %tmp1, @@ -3067,7 +3728,7 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: ssra.2d v0, v1, #1 +; CHECK-NEXT: ssra v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp3 = ashr <2 x i64> %tmp1, @@ -3081,8 +3742,8 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushr.8b v0, v0, #1 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: ushr v0.8b, v0.8b, #1 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp4 = load <8 x i8>, ptr %B @@ -3096,8 +3757,8 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushr.4h v0, v0, #1 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: ushr v0.4h, v0.4h, #1 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp4 = load <4 x i16>, ptr %B @@ -3111,8 +3772,8 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ushr.2s v0, v0, #1 -; CHECK-NEXT: orr.8b v0, v0, v1 +; CHECK-NEXT: ushr v0.2s, v0.2s, #1 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp4 = load <2 x i32>, ptr %B @@ -3126,8 +3787,8 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.16b v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.16b, v0.16b, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp4 = load <16 x i8>, ptr %B @@ -3141,8 +3802,8 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.8h v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.8h, v0.8h, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp4 = load <8 x i16>, ptr %B @@ -3156,8 +3817,8 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.4s v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.4s, v0.4s, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp4 = load <4 x i32>, ptr %B @@ -3171,8 +3832,8 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ushr.2d v0, v0, #1 -; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: ushr v0.2d, v0.2d, #1 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp4 = load <2 x i64>, ptr %B @@ -3182,13 +3843,21 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind { } define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr8b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.8b v0, v0, v0 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: shl v0.8b, v0.8b, #1 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp4 = load <8 x i8>, ptr %B %tmp3 = shl <8 x i8> %tmp1, @@ -3197,13 +3866,21 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind { } define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr4h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.4h v0, v0, v0 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h +; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: shl v0.4h, v0.4h, #1 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp4 = load <4 x i16>, ptr %B %tmp3 = shl <4 x i16> %tmp1, @@ -3212,13 +3889,21 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind { } define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr2s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.2s v0, v0, v0 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: add v0.2s, v0.2s, v0.2s +; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: ldr d1, [x1] +; CHECK-GI-NEXT: shl v0.2s, v0.2s, #1 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp4 = load <2 x i32>, ptr %B %tmp3 = shl <2 x i32> %tmp1, @@ -3227,13 +3912,21 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind { } define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.16b v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.16b, v0.16b, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp4 = load <16 x i8>, ptr %B %tmp3 = shl <16 x i8> %tmp1, @@ -3242,13 +3935,21 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind { } define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.8h v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.8h, v0.8h, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp4 = load <8 x i16>, ptr %B %tmp3 = shl <8 x i16> %tmp1, @@ -3257,13 +3958,21 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind { } define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.4s v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp4 = load <4 x i32>, ptr %B %tmp3 = shl <4 x i32> %tmp1, @@ -3272,13 +3981,21 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind { } define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: shl_orr2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: add.2d v0, v0, v0 -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shl_orr2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: add v0.2d, v0.2d, v0.2d +; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_orr2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #1 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp4 = load <2 x i64>, ptr %B %tmp3 = shl <2 x i64> %tmp1, @@ -3287,20 +4004,32 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind { } define <8 x i16> @shll(<8 x i8> %in) { -; CHECK-LABEL: shll: -; CHECK: // %bb.0: -; CHECK-NEXT: shll.8h v0, v0, #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shll: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: shll v0.8h, v0.8b, #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shll: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8 +; CHECK-GI-NEXT: ret %ext = zext <8 x i8> %in to <8 x i16> %res = shl <8 x i16> %ext, ret <8 x i16> %res } define <4 x i32> @shll_high(<8 x i16> %in) { -; CHECK-LABEL: shll_high: -; CHECK: // %bb.0: -; CHECK-NEXT: shll2.4s v0, v0, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shll_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: shll2 v0.4s, v0.8h, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shll_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #16 +; CHECK-GI-NEXT: ret %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> %ext = zext <4 x i16> %extract to <4 x i32> %res = shl <4 x i32> %ext, @@ -3312,7 +4041,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sli.8b v0, v1, #1 +; CHECK-NEXT: sli v0.8b, v1.8b, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B @@ -3325,7 +4054,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sli.4h v0, v1, #1 +; CHECK-NEXT: sli v0.4h, v1.4h, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -3338,7 +4067,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sli.2s v0, v1, #1 +; CHECK-NEXT: sli v0.2s, v1.2s, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -3364,7 +4093,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.16b v0, v1, #1 +; CHECK-NEXT: sli v0.16b, v1.16b, #1 ; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B @@ -3377,7 +4106,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.8h v0, v1, #1 +; CHECK-NEXT: sli v0.8h, v1.8h, #1 ; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B @@ -3390,7 +4119,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.4s v0, v1, #1 +; CHECK-NEXT: sli v0.4s, v1.4s, #1 ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B @@ -3403,7 +4132,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sli.2d v0, v1, #1 +; CHECK-NEXT: sli v0.2d, v1.2d, #1 ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B @@ -3422,21 +4151,37 @@ declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounw declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: ashr_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg d1, d1 -; CHECK-NEXT: sshl d0, d0, d1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ashr_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg d1, d1 +; CHECK-SD-NEXT: sshl d0, d0, d1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ashr_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: asr x8, x8, x9 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %c = ashr <1 x i64> %a, %b ret <1 x i64> %c } define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: sqshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: sqshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3445,11 +4190,19 @@ entry: } define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: uqshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uqshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uqshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: uqshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3458,11 +4211,19 @@ entry: } define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: srshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: srshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: srshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: srshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3471,11 +4232,19 @@ entry: } define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: urshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: urshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3486,8 +4255,8 @@ entry: define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { ; CHECK-LABEL: sqshlu_zero_shift_amount: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: sqshlu.2d v0, v0, #0 +; CHECK-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-NEXT: sqshlu v0.2d, v0.2d, #0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -3498,11 +4267,19 @@ entry: } define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: sshl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sshl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sshl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: sshl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3511,11 +4288,19 @@ entry: } define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { -; CHECK-LABEL: ushl_zero_shift_amount: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addp.2d v0, v0, v1 -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ushl_zero_shift_amount: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: str q0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ushl_zero_shift_amount: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: addp v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: ushl v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) @@ -3526,8 +4311,8 @@ entry: define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: sext_rshrn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rshrn.4h v0, v0, #13 -; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret entry: %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) @@ -3538,8 +4323,8 @@ entry: define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: zext_rshrn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rshrn.4h v0, v0, #13 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ret entry: %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) @@ -3550,9 +4335,9 @@ entry: define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) { ; CHECK-LABEL: mul_rshrn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi.4s v1, #3 -; CHECK-NEXT: add.4s v0, v0, v1 -; CHECK-NEXT: rshrn.4h v0, v0, #13 +; CHECK-NEXT: movi v1.4s, #3 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 ; CHECK-NEXT: ret entry: %b = add <4 x i32> %a, @@ -3561,15 +4346,61 @@ entry: } define <8 x i16> @signbits_vashr(<8 x i16> %a) { -; CHECK-LABEL: signbits_vashr: -; CHECK: // %bb.0: -; CHECK-NEXT: sshr.8h v0, v0, #8 -; CHECK-NEXT: sshr.8h v0, v0, #9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: signbits_vashr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #8 +; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: signbits_vashr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mvni v1.8h, #7 +; CHECK-GI-NEXT: mvni v2.8h, #8 +; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sshl v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #7 +; CHECK-GI-NEXT: ret %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> ) %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> ) %d = ashr <8 x i16> %c, ret <8 x i16> %d } +define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) { +; CHECK-LABEL: lshr_trunc_v2i64_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: shrn v0.2s, v0.2d, #16 +; CHECK-NEXT: ret + %b = lshr <2 x i64> %a, + %c = trunc <2 x i64> %b to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) { +; CHECK-LABEL: ashr_trunc_v2i64_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: shrn v0.2s, v0.2d, #16 +; CHECK-NEXT: ret + %b = ashr <2 x i64> %a, + %c = trunc <2 x i64> %b to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) { +; CHECK-SD-LABEL: shl_trunc_v2i64_v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_trunc_v2i64_v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #16 +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: ret + %b = shl <2 x i64> %a, + %c = trunc <2 x i64> %b to <2 x i8> + ret <2 x i8> %c +} + declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/AArch64/blr-bti-preserves-operands.mir b/llvm/test/CodeGen/AArch64/blr-bti-preserves-operands.mir index 6d4f21fdee9507..760ae4794e3043 100644 --- a/llvm/test/CodeGen/AArch64/blr-bti-preserves-operands.mir +++ b/llvm/test/CodeGen/AArch64/blr-bti-preserves-operands.mir @@ -8,7 +8,7 @@ # The arguments to the call must become implicit arguments, because the branch # only expects to get 1 explicit operand which is the branch target. -# CHECK: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit $sp, implicit $x0, implicit $w1 { +# CHECK: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit $sp, implicit $x0, implicit $w1 { # CHECK: BL @_setjmp, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $w1, implicit-def dead $lr, implicit $sp, implicit-def $sp # CHECK: HINT 36 # CHECK: } diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index d9aaae20afc69e..d4d89a7c9c22e5 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) { ; CHECK-SD-LABEL: concat1: diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll index 5a5dee0b53d439..4cb1d5b2fb345d 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll @@ -5,7 +5,7 @@ declare void @llvm.masked.scatter.nxv16i8.nxv16p0(, , i32 immarg, ) -define fastcc i8 @allocno_reload_assign() { +define fastcc i8 @allocno_reload_assign(ptr %p) { ; CHECK-LABEL: allocno_reload_assign: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, xzr @@ -14,8 +14,8 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s ; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h -; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: uzp1 p8.b, p0.b, p0.b +; CHECK-NEXT: mov z0.b, p8/z, #1 // =0x1 ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: uunpklo z1.h, z0.b @@ -30,34 +30,35 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-NEXT: punpklo p1.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: punpklo p2.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p1.b +; CHECK-NEXT: punpkhi p4.h, p1.b ; CHECK-NEXT: uunpklo z0.d, z2.s ; CHECK-NEXT: uunpkhi z1.d, z2.s -; CHECK-NEXT: punpklo p5.h, p0.b +; CHECK-NEXT: punpklo p6.h, p0.b ; CHECK-NEXT: uunpklo z2.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: punpkhi p7.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpklo z4.d, z5.s ; CHECK-NEXT: uunpkhi z5.d, z5.s ; CHECK-NEXT: uunpklo z6.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: punpklo p0.h, p2.b -; CHECK-NEXT: punpkhi p1.h, p2.b -; CHECK-NEXT: punpklo p2.h, p3.b -; CHECK-NEXT: punpkhi p3.h, p3.b -; CHECK-NEXT: punpklo p4.h, p5.b -; CHECK-NEXT: punpkhi p5.h, p5.b -; CHECK-NEXT: punpklo p6.h, p7.b -; CHECK-NEXT: punpkhi p7.h, p7.b +; CHECK-NEXT: punpklo p1.h, p2.b +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: punpklo p3.h, p4.b +; CHECK-NEXT: punpkhi p4.h, p4.b +; CHECK-NEXT: punpklo p5.h, p6.b +; CHECK-NEXT: punpkhi p6.h, p6.b +; CHECK-NEXT: punpklo p7.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: st1b { z0.d }, p0, [z16.d] -; CHECK-NEXT: st1b { z1.d }, p1, [z16.d] -; CHECK-NEXT: st1b { z2.d }, p2, [z16.d] -; CHECK-NEXT: st1b { z3.d }, p3, [z16.d] -; CHECK-NEXT: st1b { z4.d }, p4, [z16.d] -; CHECK-NEXT: st1b { z5.d }, p5, [z16.d] -; CHECK-NEXT: st1b { z6.d }, p6, [z16.d] -; CHECK-NEXT: st1b { z7.d }, p7, [z16.d] +; CHECK-NEXT: st1b { z0.d }, p1, [z16.d] +; CHECK-NEXT: st1b { z1.d }, p2, [z16.d] +; CHECK-NEXT: st1b { z2.d }, p3, [z16.d] +; CHECK-NEXT: st1b { z3.d }, p4, [z16.d] +; CHECK-NEXT: st1b { z4.d }, p5, [z16.d] +; CHECK-NEXT: st1b { z5.d }, p6, [z16.d] +; CHECK-NEXT: st1b { z6.d }, p7, [z16.d] +; CHECK-NEXT: st1b { z7.d }, p0, [z16.d] +; CHECK-NEXT: str p8, [x0] ; CHECK-NEXT: b .LBB0_1 br label %1 @@ -66,6 +67,7 @@ define fastcc i8 @allocno_reload_assign() { %constexpr1 = shufflevector %constexpr, poison, zeroinitializer %constexpr2 = xor %constexpr1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) call void @llvm.masked.scatter.nxv16i8.nxv16p0( zeroinitializer, zeroinitializer, i32 0, %constexpr2) + store %constexpr, ptr %p, align 16 br label %1 } diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir index f9878adfe5e448..ffa7453e48b4f0 100644 --- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -91,10 +91,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv @@ -111,10 +111,10 @@ body: | %6:gpr64common = LOADgot target-flags(aarch64-got) @c %3:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) - INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) %0:fpr64 = COPY %2 %5:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) - INLINEASM &"", 1 /* sideeffect attdialect */, 3735562 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 3342346 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) %7:fpr64 = FNEGDr %2 nofpexcept FCMPDrr %4, killed %7, implicit-def $nzcv, implicit $fpcr Bcc 1, %bb.2, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir b/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir index 89102a8c3770d7..b1e48346c27464 100644 --- a/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir +++ b/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir @@ -39,7 +39,7 @@ # CHECK: bb.0: # CHECK-NEXT: liveins: # CHECK-NEXT: {{ $}} -# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit-def $w29_hi, implicit $x0, implicit $sp, implicit $xzr, implicit $fp { +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit $x0, implicit $sp, implicit $xzr, implicit $fp { # CHECK-NEXT: BLR $x0, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 # CHECK-NEXT: ORRXrs $xzr, $fp, 0 # CHECK-NEXT: BL @attachedcall, implicit-def $lr, implicit internal $sp @@ -62,7 +62,7 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: # CHECK-NEXT: {{ $}} -# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit-def $w29_hi, implicit $sp, implicit $x0, implicit $xzr, implicit $fp { +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit $sp, implicit $x0, implicit $xzr, implicit $fp { # CHECK-NEXT: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 # CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 # CHECK-NEXT: BL @attachedcall, implicit-def $lr, implicit internal $sp @@ -82,7 +82,7 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: # CHECK-NEXT: {{ $}} -# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit-def $x0, implicit-def $w0, implicit-def $w0_hi, implicit-def $fp, implicit-def $w29, implicit-def $w29_hi, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $xzr, implicit $fp { +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def $x0, implicit-def $w0, implicit-def $fp, implicit-def $w29, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $xzr, implicit $fp { # CHECK-NEXT: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 # CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 # CHECK-NEXT: BL @attachedcall, implicit-def $lr, implicit internal $sp @@ -102,7 +102,7 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: # CHECK-NEXT: {{ $}} -# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit-def $w29_hi, implicit $sp, implicit $w0, implicit $w1, implicit $xzr, implicit $fp { +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit $sp, implicit $w0, implicit $w1, implicit $xzr, implicit $fp { # CHECK-NEXT: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 # CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 # CHECK-NEXT: BL @attachedcall, implicit-def $lr, implicit internal $sp @@ -123,7 +123,7 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: # CHECK-NEXT: {{ $}} -# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit-def $w29_hi, implicit $x8, implicit $sp, implicit $w0, implicit $w1, implicit $xzr, implicit $fp { +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit $x8, implicit $sp, implicit $w0, implicit $w1, implicit $xzr, implicit $fp { # CHECK-NEXT: BLR $x8, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $w1, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 # CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 # CHECK-NEXT: BL @attachedcall, implicit-def $lr, implicit internal $sp @@ -145,7 +145,7 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: # CHECK-NEXT: {{ $}} -# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $w30_hi, implicit-def $sp, implicit-def $wsp, implicit-def $wsp_hi, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit-def $w29_hi, implicit $sp, implicit undef $x0, implicit $xzr, implicit $fp { +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit-def $fp, implicit-def $w29, implicit $sp, implicit undef $x0, implicit $xzr, implicit $fp { # CHECK-NEXT: BL @foo, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit undef $x0, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 # CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 # CHECK-NEXT: BL @objc_retainAutoreleasedReturnValue, implicit-def $lr, implicit internal $sp diff --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll index 43c6e01911462a..75d55773b3681e 100644 --- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll +++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <2 x i32> @and_extract_zext_idx0(<4 x i16> %vec) nounwind { ; CHECK-SD-LABEL: and_extract_zext_idx0: diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll new file mode 100644 index 00000000000000..2388a0f206a51c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll @@ -0,0 +1,225 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + + +define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) { +; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: cmp w8, #5 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, splat (i32 5) + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) { +; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> splat(i32 7), %a + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) { +; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: cmp w8, #234 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) { +; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: fmov s1, #4.00000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0) + %ext = extractelement <4 x i1> %fcmp, i32 1 + ret i1 %ext +} + +define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) { +; CHECK-LABEL: vector_loop_with_icmp: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov w8, #4 // =0x4 +; CHECK-NEXT: mov w9, #16 // =0x10 +; CHECK-NEXT: dup v2.2d, x8 +; CHECK-NEXT: add x8, x0, #8 +; CHECK-NEXT: mov w10, #1 // =0x1 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: add z1.d, z1.d, #2 // =0x2 +; CHECK-NEXT: b .LBB4_2 +; CHECK-NEXT: .LBB4_1: // %pred.store.continue18 +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: subs x9, x9, #4 +; CHECK-NEXT: add x8, x8, #16 +; CHECK-NEXT: b.eq .LBB4_10 +; CHECK-NEXT: .LBB4_2: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: cmp x11, #14 +; CHECK-NEXT: b.hi .LBB4_4 +; CHECK-NEXT: // %bb.3: // %pred.store.if +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: stur w10, [x8, #-8] +; CHECK-NEXT: .LBB4_4: // %pred.store.continue +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: mov x11, v0.d[1] +; CHECK-NEXT: cmp x11, #14 +; CHECK-NEXT: b.hi .LBB4_6 +; CHECK-NEXT: // %bb.5: // %pred.store.if5 +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: stur w10, [x8, #-4] +; CHECK-NEXT: .LBB4_6: // %pred.store.continue6 +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: cmp x11, #14 +; CHECK-NEXT: b.hi .LBB4_8 +; CHECK-NEXT: // %bb.7: // %pred.store.if7 +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: str w10, [x8] +; CHECK-NEXT: .LBB4_8: // %pred.store.continue8 +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: mov x11, v1.d[1] +; CHECK-NEXT: cmp x11, #14 +; CHECK-NEXT: b.hi .LBB4_1 +; CHECK-NEXT: // %bb.9: // %pred.store.if9 +; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: str w10, [x8, #4] +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_10: // %for.cond.cleanup +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue18 ] + %vec.ind = phi <4 x i64> [ , %entry ], [ %vec.ind.next, %pred.store.continue18 ] + %0 = icmp ult <4 x i64> %vec.ind, + %1 = extractelement <4 x i1> %0, i64 0 + br i1 %1, label %pred.store.if, label %pred.store.continue + +pred.store.if: + %2 = getelementptr inbounds i32, ptr %dest, i64 %index + store i32 1, ptr %2, align 4 + br label %pred.store.continue + +pred.store.continue: + %3 = extractelement <4 x i1> %0, i64 1 + br i1 %3, label %pred.store.if5, label %pred.store.continue6 + +pred.store.if5: + %4 = or disjoint i64 %index, 1 + %5 = getelementptr inbounds i32, ptr %dest, i64 %4 + store i32 1, ptr %5, align 4 + br label %pred.store.continue6 + +pred.store.continue6: + %6 = extractelement <4 x i1> %0, i64 2 + br i1 %6, label %pred.store.if7, label %pred.store.continue8 + +pred.store.if7: + %7 = or disjoint i64 %index, 2 + %8 = getelementptr inbounds i32, ptr %dest, i64 %7 + store i32 1, ptr %8, align 4 + br label %pred.store.continue8 + +pred.store.continue8: + %9 = extractelement <4 x i1> %0, i64 3 + br i1 %9, label %pred.store.if9, label %pred.store.continue18 + +pred.store.if9: + %10 = or disjoint i64 %index, 3 + %11 = getelementptr inbounds i32, ptr %dest, i64 %10 + store i32 1, ptr %11, align 4 + br label %pred.store.continue18 + +pred.store.continue18: + %index.next = add i64 %index, 4 + %vec.ind.next = add <4 x i64> %vec.ind, + %24 = icmp eq i64 %index.next, 16 + br i1 %24, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: + ret void +} + + +; Negative tests + +define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: extract_icmp_v4i32_splat_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v1.4s, w0 +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %ins = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + %icmp = icmp ult <4 x i32> %a, %splat + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) { +; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #235 +; CHECK-NEXT: adrp x9, .LCPI6_0 +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_0] +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: xtn v1.4h, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: umov w9, v1.h[1] +; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: and w0, w9, #0x1 +; CHECK-NEXT: strb w10, [x8] +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, splat(i32 235) + %ext = extractelement <4 x i1> %icmp, i32 1 + store <4 x i1> %icmp, ptr %p, align 4 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) { +; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movi v1.4s, #127 +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: bfi x8, x0, #1, #2 +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: str d0, [sp, #8] +; CHECK-NEXT: ldrh w8, [x8] +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, splat(i32 127) + %ext = extractelement <4 x i1> %icmp, i32 %c + ret i1 %ext +} diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll index d18af3d5ae9450..7705d8949ca1ed 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll @@ -2,6 +2,13 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for insert_vscale_8_i16_zero +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_vscale_8_i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_vscale_16_i8_zero +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_vscale_16_i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_vscale_16_i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_vscale_16_i8_zero + define @insert_vscale_2_i64_zero( %vec, i64 %elt) { ; CHECK-SD-LABEL: insert_vscale_2_i64_zero: ; CHECK-SD: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 7056a4d28fed39..51aad4fe25d3b8 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,164 +1,308 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-NO16,CHECK-SD-NO16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-FP16,CHECK-SD-FP16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-NO16,CHECK-GI-NO16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-FP16,CHECK-GI-FP16 ; fptoui define i32 @fcvtzs_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptosi float %fix to i32 ret i32 %cvt } define i32 @fcvtzs_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = fptosi float %fix to i32 ret i32 %cvt } define i64 @fcvtzs_f32_i64_7(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptosi float %fix to i64 ret i64 %cvt } define i64 @fcvtzs_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzs_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = fptosi float %fix to i64 ret i64 %cvt } define i32 @fcvtzs_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptosi double %fix to i32 ret i32 %cvt } define i32 @fcvtzs_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = fptosi double %fix to i32 ret i32 %cvt } define i64 @fcvtzs_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptosi double %fix to i64 ret i64 %cvt } define i64 @fcvtzs_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzs_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = fptosi double %fix to i64 ret i64 %cvt } define i32 @fcvtzs_f16_i32_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptosi half %fix to i32 ret i32 %cvt } define i32 @fcvtzs_f16_i32_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptosi half %fix to i32 ret i32 %cvt } define i64 @fcvtzs_f16_i64_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptosi half %fix to i64 ret i64 %cvt } define i64 @fcvtzs_f16_i64_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzs_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptosi half %fix to i64 ret i64 %cvt @@ -167,160 +311,302 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; fptoui define i32 @fcvtzu_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptoui float %fix to i32 ret i32 %cvt } define i32 @fcvtzu_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = fptoui float %fix to i32 ret i32 %cvt } define i64 @fcvtzu_f32_i64_7(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = fptoui float %fix to i64 ret i64 %cvt } define i64 @fcvtzu_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzu_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = fptoui float %fix to i64 ret i64 %cvt } define i32 @fcvtzu_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptoui double %fix to i32 ret i32 %cvt } define i32 @fcvtzu_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = fptoui double %fix to i32 ret i32 %cvt } define i64 @fcvtzu_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = fptoui double %fix to i64 ret i64 %cvt } define i64 @fcvtzu_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzu_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = fptoui double %fix to i64 ret i64 %cvt } define i32 @fcvtzu_f16_i32_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptoui half %fix to i32 ret i32 %cvt } define i32 @fcvtzu_f16_i32_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI21_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptoui half %fix to i32 ret i32 %cvt } define i64 @fcvtzu_f16_i64_7(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI22_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 128.0 %cvt = fptoui half %fix to i64 ret i64 %cvt } define i64 @fcvtzu_f16_i64_15(half %flt) { -; CHECK-NO16-LABEL: fcvtzu_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI23_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %flt, 32768.0 %cvt = fptoui half %fix to i64 ret i64 %cvt @@ -329,160 +615,302 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; sitofp define float @scvtf_f32_i32_7(i32 %int) { -; CHECK-LABEL: scvtf_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: scvtf s1, w0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @scvtf_f32_i32_32(i32 %int) { -; CHECK-LABEL: scvtf_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf s0, w0 +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to float %fix = fdiv float %cvt, 4294967296.0 ret float %fix } define float @scvtf_f32_i64_7(i64 %long) { -; CHECK-LABEL: scvtf_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: scvtf s1, x0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @scvtf_f32_i64_64(i64 %long) { -; CHECK-LABEL: scvtf_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf s0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf s0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf s0, x0 +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to float %fix = fdiv float %cvt, 18446744073709551616.0 ret float %fix } define double @scvtf_f64_i32_7(i32 %int) { -; CHECK-LABEL: scvtf_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @scvtf_f64_i32_32(i32 %int) { -; CHECK-LABEL: scvtf_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i32 %int to double %fix = fdiv double %cvt, 4294967296.0 ret double %fix } define double @scvtf_f64_i64_7(i64 %long) { -; CHECK-LABEL: scvtf_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @scvtf_f64_i64_64(i64 %long) { -; CHECK-LABEL: scvtf_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: scvtf d0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: scvtf_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: scvtf d0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: scvtf_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: scvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = sitofp i64 %long to double %fix = fdiv double %cvt, 18446744073709551616.0 ret double %fix } define half @scvtf_f16_i32_7(i32 %int) { -; CHECK-NO16-LABEL: scvtf_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, w0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, w0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i32 %int to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @scvtf_f16_i32_15(i32 %int) { -; CHECK-NO16-LABEL: scvtf_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, w0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, w0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i32 %int to half %fix = fdiv half %cvt, 32768.0 ret half %fix } define half @scvtf_f16_i64_7(i64 %long) { -; CHECK-NO16-LABEL: scvtf_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, x0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, x0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i64 %long to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @scvtf_f16_i64_15(i64 %long) { -; CHECK-NO16-LABEL: scvtf_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: scvtf_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: scvtf h0, x0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: scvtf_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: scvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: scvtf_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: scvtf h0, x0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: scvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: scvtf_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: scvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = sitofp i64 %long to half %fix = fdiv half %cvt, 32768.0 ret half %fix @@ -491,160 +919,302 @@ define half @scvtf_f16_i64_15(i64 %long) { ; uitofp define float @ucvtf_f32_i32_7(i32 %int) { -; CHECK-LABEL: ucvtf_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: ucvtf s1, w0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @ucvtf_f32_i32_32(i32 %int) { -; CHECK-LABEL: ucvtf_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf s0, w0 +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to float %fix = fdiv float %cvt, 4294967296.0 ret float %fix } define float @ucvtf_f32_i64_7(i64 %long) { -; CHECK-LABEL: ucvtf_f32_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-GI-NEXT: ucvtf s1, x0 +; CHECK-GI-NEXT: fdiv s0, s1, s0 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to float %fix = fdiv float %cvt, 128.0 ret float %fix } define float @ucvtf_f32_i64_64(i64 %long) { -; CHECK-LABEL: ucvtf_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf s0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf s0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf s0, x0 +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to float %fix = fdiv float %cvt, 18446744073709551616.0 ret float %fix } define double @ucvtf_f64_i32_7(i32 %int) { -; CHECK-LABEL: ucvtf_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, w0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, w0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @ucvtf_f64_i32_32(i32 %int) { -; CHECK-LABEL: ucvtf_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, w0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, w0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, w0 +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i32 %int to double %fix = fdiv double %cvt, 4294967296.0 ret double %fix } define double @ucvtf_f64_i64_7(i64 %long) { -; CHECK-LABEL: ucvtf_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, x0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, x0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to double %fix = fdiv double %cvt, 128.0 ret double %fix } define double @ucvtf_f64_i64_64(i64 %long) { -; CHECK-LABEL: ucvtf_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf d0, x0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ucvtf_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ucvtf d0, x0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ucvtf_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ucvtf d0, x0 +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fdiv d0, d0, d1 +; CHECK-GI-NEXT: ret %cvt = uitofp i64 %long to double %fix = fdiv double %cvt, 18446744073709551616.0 ret double %fix } define half @ucvtf_f16_i32_7(i32 %int) { -; CHECK-NO16-LABEL: ucvtf_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, w0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, w0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i32 %int to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @ucvtf_f16_i32_15(i32 %int) { -; CHECK-NO16-LABEL: ucvtf_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, w0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, w0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, w0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, w0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, w0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, w0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i32 %int to half %fix = fdiv half %cvt, 32768.0 ret half %fix } define half @ucvtf_f16_i64_7(i64 %long) { -; CHECK-NO16-LABEL: ucvtf_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, x0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #60, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, x0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI46_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i64 %long to half %fix = fdiv half %cvt, 128.0 ret half %fix } define half @ucvtf_f16_i64_15(i64 %long) { -; CHECK-NO16-LABEL: ucvtf_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s1, x0 -; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24 -; CHECK-NO16-NEXT: fcvt h1, s1 -; CHECK-NO16-NEXT: fcvt s1, h1 -; CHECK-NO16-NEXT: fmul s0, s1, s0 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: ucvtf_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ucvtf h0, x0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: ucvtf_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: ucvtf s1, x0 +; CHECK-SD-NO16-NEXT: movi v0.2s, #56, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt h1, s1 +; CHECK-SD-NO16-NEXT: fcvt s1, h1 +; CHECK-SD-NO16-NEXT: fmul s0, s1, s0 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: ucvtf_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: ucvtf h0, x0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: ucvtf s0, x0 +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: ucvtf h0, x0 +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0] +; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-GI-FP16-NEXT: ret %cvt = uitofp i64 %long to half %fix = fdiv half %cvt, 32768.0 ret half %fix @@ -661,150 +1231,285 @@ declare i32 @llvm.fptosi.sat.i32.f16(half) declare i64 @llvm.fptosi.sat.i64.f16(half) define i32 @fcvtzs_sat_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzs_sat_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) ret i32 %cvt } define i32 @fcvtzs_sat_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzs_sat_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) ret i32 %cvt } define i64 @fcvtzs_sat_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzs_sat_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzs x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = call i64 @llvm.fptosi.sat.i64.f32(float %fix) ret i64 %cvt } define i32 @fcvtzs_sat_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) ret i32 %cvt } define i32 @fcvtzs_sat_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) ret i32 %cvt } define i64 @fcvtzs_sat_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) ret i64 %cvt } define i64 @fcvtzs_sat_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzs_sat_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_sat_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sat_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzs x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) ret i64 %cvt } define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI55_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) ret i32 %cvt } define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI56_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) ret i32 %cvt } define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI57_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) ret i64 %cvt } define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzs x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzs x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI58_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) ret i64 %cvt @@ -820,151 +1525,290 @@ declare i32 @llvm.fptoui.sat.i32.f16(half) declare i64 @llvm.fptoui.sat.i64.f16(half) define i32 @fcvtzu_sat_f32_i32_7(float %flt) { -; CHECK-LABEL: fcvtzu_sat_f32_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f32_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f32_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) ret i32 %cvt } define i32 @fcvtzu_sat_f32_i32_32(float %flt) { -; CHECK-LABEL: fcvtzu_sat_f32_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, s0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f32_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, s0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f32_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu w0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) ret i32 %cvt } define i64 @fcvtzu_sat_f32_i64_64(float %flt) { -; CHECK-LABEL: fcvtzu_sat_f32_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, s0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f32_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, s0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f32_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #1602224128 // =0x5f800000 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: fmul s0, s0, s1 +; CHECK-GI-NEXT: fcvtzu x0, s0 +; CHECK-GI-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = call i64 @llvm.fptoui.sat.i64.f32(float %fix) ret i64 %cvt } define i32 @fcvtzu_sat_f64_i32_7(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i32_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i32_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i32_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) ret i32 %cvt } define i32 @fcvtzu_sat_f64_i32_32(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i32_32: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w0, d0, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i32_32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu w0, d0, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i32_32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4751297606875873280 // =0x41f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu w0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) ret i32 %cvt } define i64 @fcvtzu_sat_f64_i64_7(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i64_7: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i64_7: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i64_7: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4638707616191610880 // =0x4060000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) ret i64 %cvt } define i64 @fcvtzu_sat_f64_i64_64(double %dbl) { -; CHECK-LABEL: fcvtzu_sat_f64_i64_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x0, d0, #64 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_sat_f64_i64_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x0, d0, #64 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sat_f64_i64_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #4895412794951729152 // =0x43f0000000000000 +; CHECK-GI-NEXT: fmov d1, x8 +; CHECK-GI-NEXT: fmul d0, d0, d1 +; CHECK-GI-NEXT: fcvtzu x0, d0 +; CHECK-GI-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) ret i64 %cvt } define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI66_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) ret i32 %cvt } define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu w0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu w0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu w0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI67_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu w0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) ret i32 %cvt } define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_7: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_7: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #7 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI68_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) ret i64 %cvt } define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { -; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_15: -; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fcvtzu x0, s0 -; CHECK-NO16-NEXT: ret -; -; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_15: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 -; CHECK-FP16-NEXT: ret +; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-SD-NO16: // %bb.0: +; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fmul s0, s0, s1 +; CHECK-SD-NO16-NEXT: fcvt h0, s0 +; CHECK-SD-NO16-NEXT: fcvt s0, h0 +; CHECK-SD-NO16-NEXT: fcvtzu x0, s0 +; CHECK-SD-NO16-NEXT: ret +; +; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-SD-FP16: // %bb.0: +; CHECK-SD-FP16-NEXT: fcvtzu x0, h0, #15 +; CHECK-SD-FP16-NEXT: ret +; +; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-GI-NO16: // %bb.0: +; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: fcvt s0, h0 +; CHECK-GI-NO16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NO16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: adrp x8, .LCPI69_0 +; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] +; CHECK-GI-FP16-NEXT: fmul h0, h0, h1 +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) ret i64 %cvt } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} +; CHECK-FP16: {{.*}} +; CHECK-NO16: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll index bbfec8c7c33617..4ab5db450a7f32 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) { ; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16: diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll index 4cce06dce44c9b..a80d51bac99297 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -1,11 +1,84 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that constrained fp intrinsics are correctly lowered. +; CHECK-GI: warning: Instruction selection used fallback path for add_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f16_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f16_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f16_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f16_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f16_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f16_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f16_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f32_f16 ; Half-precision intrinsics diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll index 6147afba4e603a..83e60c10897624 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -1,9 +1,86 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 %s -disable-strictnode-mutation -o - | FileCheck %s -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64 %s -disable-strictnode-mutation -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that constrained fp vector intrinsics are correctly lowered. +; CHECK-GI: warning: Instruction selection used fallback path for add_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v4i32_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v4i32_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v4i64_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v4i64_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v4f32_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v4f32_v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v4f32_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v4f32_v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v2i32_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v2i32_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v2i64_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v2i64_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v2f64_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v2f64_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v2f64_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v2f64_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v1i32_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v1i32_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_v1i64_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_v1i64_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v1f64_v1i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v1f64_v1i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_v1f64_v1i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_v1f64_v1i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_v1f61 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_v1f61 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_v2f32_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_v2f64_v2f32 ; Single-precision intrinsics @@ -882,3 +959,7 @@ declare <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double>, <1 x d declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll index fd3a0c3207606c..f2a14a9b73fa16 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -1,543 +1,1037 @@ -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s -; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that constrained fp intrinsics are correctly lowered. +; CHECK-GI: warning: Instruction selection used fallback path for add_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f32_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f32_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f32_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f32_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f32_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f32_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maximum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minimum_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f64_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f64_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f64_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f64_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f64_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f64_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maximum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minimum_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sub_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for mul_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for div_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for frem_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fma_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f128_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f128_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f128_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f128_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sitofp_f128_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uitofp_f128_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqrt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for powi_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for maxnum_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for minnum_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_olt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ole_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ogt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_oeq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_one_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ult_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ule_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ugt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_uge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_ueq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmp_une_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_olt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ole_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ogt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_oeq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_one_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ult_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ule_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ugt_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_uge_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_ueq_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fcmps_une_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f32_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f32_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptrunc_f64_f128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f64_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f128_f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fpext_f128_f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sin_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cos_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tan_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for asin_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for acos_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for atan2_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sinh_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cosh_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tanh_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pow_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log2_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for log10_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp_v1f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for exp2_v1f64 + ; Single-precision intrinsics -; CHECK-LABEL: add_f32: -; CHECK: fadd s0, s0, s1 define float @add_f32(float %x, float %y) #0 { +; CHECK-LABEL: add_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sub_f32: -; CHECK: fsub s0, s0, s1 define float @sub_f32(float %x, float %y) #0 { +; CHECK-LABEL: sub_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: mul_f32: -; CHECK: fmul s0, s0, s1 define float @mul_f32(float %x, float %y) #0 { +; CHECK-LABEL: mul_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: div_f32: -; CHECK: fdiv s0, s0, s1 define float @div_f32(float %x, float %y) #0 { +; CHECK-LABEL: div_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: frem_f32: -; CHECK: bl fmodf define float @frem_f32(float %x, float %y) #0 { +; CHECK-LABEL: frem_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fma_f32: -; CHECK: fmadd s0, s0, s1, s2 define float @fma_f32(float %x, float %y, float %z) #0 { +; CHECK-LABEL: fma_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fptosi_i32_f32: -; CHECK: fcvtzs w0, s0 define i32 @fptosi_i32_f32(float %x) #0 { +; CHECK-LABEL: fptosi_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptoui_i32_f32: -; CHECK: fcvtzu w0, s0 define i32 @fptoui_i32_f32(float %x) #0 { +; CHECK-LABEL: fptoui_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptosi_i64_f32: -; CHECK: fcvtzs x0, s0 define i64 @fptosi_i64_f32(float %x) #0 { +; CHECK-LABEL: fptosi_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: fptoui_i64_f32: -; CHECK: fcvtzu x0, s0 define i64 @fptoui_i64_f32(float %x) #0 { +; CHECK-LABEL: fptoui_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: sitofp_f32_i32: -; CHECK: scvtf s0, w0 define float @sitofp_f32_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f32_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: uitofp_f32_i32: -; CHECK: ucvtf s0, w0 define float @uitofp_f32_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f32_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sitofp_f32_i64: -; CHECK: scvtf s0, x0 define float @sitofp_f32_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, x0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: uitofp_f32_i64: -; CHECK: ucvtf s0, x0 define float @uitofp_f32_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f32_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, x0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sitofp_f32_i128: -; CHECK: bl __floattisf define float @sitofp_f32_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f32_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floattisf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: uitofp_f32_i128: -; CHECK: bl __floatuntisf define float @uitofp_f32_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f32_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatuntisf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.uitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sqrt_f32: -; CHECK: fsqrt s0, s0 define float @sqrt_f32(float %x) #0 { +; CHECK-LABEL: sqrt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: powi_f32: -; CHECK: bl __powisf2 define float @powi_f32(float %x, i32 %y) #0 { +; CHECK-LABEL: powi_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __powisf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.powi.f32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sin_f32: -; CHECK: bl sinf define float @sin_f32(float %x) #0 { +; CHECK-LABEL: sin_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: cos_f32: -; CHECK: bl cosf define float @cos_f32(float %x) #0 { +; CHECK-LABEL: cos_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: tan_f32: -; CHECK: bl tanf define float @tan_f32(float %x) #0 { +; CHECK-LABEL: tan_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: asin_f32: -; CHECK: bl asinf define float @asin_f32(float %x) #0 { +; CHECK-LABEL: asin_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asinf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: acos_f32: -; CHECK: bl acosf define float @acos_f32(float %x) #0 { +; CHECK-LABEL: acos_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acosf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: atan_f32: -; CHECK: bl atanf define float @atan_f32(float %x) #0 { +; CHECK-LABEL: atan_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atanf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: atan2_f32: -; CHECK: bl atan2f define float @atan2_f32(float %x, float %y) #0 { +; CHECK-LABEL: atan2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: sinh_f32: -; CHECK: bl sinhf define float @sinh_f32(float %x) #0 { +; CHECK-LABEL: sinh_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinhf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: cosh_f32: -; CHECK: bl coshf define float @cosh_f32(float %x) #0 { +; CHECK-LABEL: cosh_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl coshf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: tanh_f32: -; CHECK: bl tanhf define float @tanh_f32(float %x) #0 { +; CHECK-LABEL: tanh_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanhf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: pow_f32: -; CHECK: bl powf define float @pow_f32(float %x, float %y) #0 { +; CHECK-LABEL: pow_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl powf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: log_f32: -; CHECK: bl logf define float @log_f32(float %x) #0 { +; CHECK-LABEL: log_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl logf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: log10_f32: -; CHECK: bl log10f define float @log10_f32(float %x) #0 { +; CHECK-LABEL: log10_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: log2_f32: -; CHECK: bl log2f define float @log2_f32(float %x) #0 { +; CHECK-LABEL: log2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.log2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: exp_f32: -; CHECK: bl expf define float @exp_f32(float %x) #0 { +; CHECK-LABEL: exp_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl expf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: exp2_f32: -; CHECK: bl exp2f define float @exp2_f32(float %x) #0 { +; CHECK-LABEL: exp2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2f +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.exp2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: rint_f32: -; CHECK: frintx s0, s0 define float @rint_f32(float %x) #0 { +; CHECK-LABEL: rint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.rint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: nearbyint_f32: -; CHECK: frinti s0, s0 define float @nearbyint_f32(float %x) #0 { +; CHECK-LABEL: nearbyint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.nearbyint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: lrint_f32: -; CHECK: frintx [[REG:s[0-9]+]], s0 -; CHECK: fcvtzs w0, [[REG]] define i32 @lrint_f32(float %x) #0 { +; CHECK-LABEL: lrint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llrint_f32: -; CHECK: frintx [[REG:s[0-9]+]], s0 -; CHECK: fcvtzs x0, [[REG]] define i64 @llrint_f32(float %x) #0 { +; CHECK-LABEL: llrint_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: maxnum_f32: -; CHECK: fmaxnm s0, s0, s1 define float @maxnum_f32(float %x, float %y) #0 { +; CHECK-LABEL: maxnum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.maxnum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: minnum_f32: -; CHECK: fminnm s0, s0, s1 define float @minnum_f32(float %x, float %y) #0 { +; CHECK-LABEL: minnum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.minnum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: maximum_f32: -; CHECK: fmax s0, s0, s1 define float @maximum_f32(float %x, float %y) #0 { +; CHECK-LABEL: maximum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.maximum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: minimum_f32: -; CHECK: fmin s0, s0, s1 define float @minimum_f32(float %x, float %y) #0 { +; CHECK-LABEL: minimum_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin s0, s0, s1 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.minimum.f32(float %x, float %y, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: ceil_f32: -; CHECK: frintp s0, s0 define float @ceil_f32(float %x) #0 { +; CHECK-LABEL: ceil_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: floor_f32: -; CHECK: frintm s0, s0 define float @floor_f32(float %x) #0 { +; CHECK-LABEL: floor_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: lround_f32: -; CHECK: fcvtas w0, s0 define i32 @lround_f32(float %x) #0 { +; CHECK-LABEL: lround_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas w0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llround_f32: -; CHECK: fcvtas x0, s0 define i64 @llround_f32(float %x) #0 { +; CHECK-LABEL: llround_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas x0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: round_f32: -; CHECK: frinta s0, s0 define float @round_f32(float %x) #0 { +; CHECK-LABEL: round_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.round.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: roundeven_f32: -; CHECK: frintn s0, s0 define float @roundeven_f32(float %x) #0 { +; CHECK-LABEL: roundeven_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: trunc_f32: -; CHECK: frintz s0, s0 define float @trunc_f32(float %x) #0 { +; CHECK-LABEL: trunc_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.trunc.f32(float %x, metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fcmp_olt_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_olt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_olt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ole_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ole_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ole_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ogt_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ogt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ogt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oge_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_oge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_oge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oeq_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_oeq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_oeq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_one_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_one_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_one_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ult_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ult_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ult_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ule_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ule_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ule_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ugt_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ugt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ugt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_uge_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_uge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_uge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ueq_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_ueq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_ueq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_une_f32: -; CHECK: fcmp s0, s1 define i32 @fcmp_une_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmp_une_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_olt_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_olt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_olt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ole_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ole_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ole_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ogt_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ogt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ogt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oge_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_oge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_oge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oeq_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_oeq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_oeq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_one_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_one_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_one_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ult_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ult_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ult_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ule_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ule_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ule_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ugt_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ugt_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ugt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_uge_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_uge_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_uge_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ueq_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_ueq_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_ueq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_une_f32: -; CHECK: fcmpe s0, s1 define i32 @fcmps_une_f32(float %a, float %b) #0 { +; CHECK-LABEL: fcmps_une_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -546,538 +1040,792 @@ define i32 @fcmps_une_f32(float %a, float %b) #0 { ; Double-precision intrinsics -; CHECK-LABEL: add_f64: -; CHECK: fadd d0, d0, d1 define double @add_f64(double %x, double %y) #0 { +; CHECK-LABEL: add_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sub_f64: -; CHECK: fsub d0, d0, d1 define double @sub_f64(double %x, double %y) #0 { +; CHECK-LABEL: sub_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: mul_f64: -; CHECK: fmul d0, d0, d1 define double @mul_f64(double %x, double %y) #0 { +; CHECK-LABEL: mul_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: div_f64: -; CHECK: fdiv d0, d0, d1 define double @div_f64(double %x, double %y) #0 { +; CHECK-LABEL: div_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fdiv.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: frem_f64: -; CHECK: bl fmod define double @frem_f64(double %x, double %y) #0 { +; CHECK-LABEL: frem_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmod +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.frem.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fma_f64: -; CHECK: fmadd d0, d0, d1, d2 define double @fma_f64(double %x, double %y, double %z) #0 { +; CHECK-LABEL: fma_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fptosi_i32_f64: -; CHECK: fcvtzs w0, d0 define i32 @fptosi_i32_f64(double %x) #0 { +; CHECK-LABEL: fptosi_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptoui_i32_f64: -; CHECK: fcvtzu w0, d0 define i32 @fptoui_i32_f64(double %x) #0 { +; CHECK-LABEL: fptoui_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptosi_i64_f64: -; CHECK: fcvtzs x0, d0 define i64 @fptosi_i64_f64(double %x) #0 { +; CHECK-LABEL: fptosi_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: fptoui_i64_f64: -; CHECK: fcvtzu x0, d0 define i64 @fptoui_i64_f64(double %x) #0 { +; CHECK-LABEL: fptoui_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: sitofp_f64_i32: -; CHECK: scvtf d0, w0 define double @sitofp_f64_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, w0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: uitofp_f64_i32: -; CHECK: ucvtf d0, w0 define double @uitofp_f64_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f64_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, w0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sitofp_f64_i64: -; CHECK: scvtf d0, x0 define double @sitofp_f64_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f64_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, x0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: uitofp_f64_i64: -; CHECK: ucvtf d0, x0 define double @uitofp_f64_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f64_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, x0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sitofp_f64_i128: -; CHECK: bl __floattidf define double @sitofp_f64_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f64_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floattidf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: uitofp_f64_i128: -; CHECK: bl __floatuntidf define double @uitofp_f64_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f64_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatuntidf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.uitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sqrt_f64: -; CHECK: fsqrt d0, d0 define double @sqrt_f64(double %x) #0 { +; CHECK-LABEL: sqrt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sqrt.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: powi_f64: -; CHECK: bl __powidf2 define double @powi_f64(double %x, i32 %y) #0 { +; CHECK-LABEL: powi_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __powidf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.powi.f64(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sin_f64: -; CHECK: bl sin define double @sin_f64(double %x) #0 { +; CHECK-LABEL: sin_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: cos_f64: -; CHECK: bl cos define double @cos_f64(double %x) #0 { +; CHECK-LABEL: cos_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.cos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: tan_f64: -; CHECK: bl tan define double @tan_f64(double %x) #0 { +; CHECK-LABEL: tan_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: asin_f64: -; CHECK: bl asin define double @asin_f64(double %x) #0 { +; CHECK-LABEL: asin_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.asin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: acos_f64: -; CHECK: bl acos define double @acos_f64(double %x) #0 { +; CHECK-LABEL: acos_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.acos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: atan_f64: -; CHECK: bl atan define double @atan_f64(double %x) #0 { +; CHECK-LABEL: atan_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.atan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: atan2_f64: -; CHECK: bl atan2 define double @atan2_f64(double %x, double %y) #0 { +; CHECK-LABEL: atan2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: sinh_f64: -; CHECK: bl sinh define double @sinh_f64(double %x) #0 { +; CHECK-LABEL: sinh_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.sinh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: cosh_f64: -; CHECK: bl cosh define double @cosh_f64(double %x) #0 { +; CHECK-LABEL: cosh_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.cosh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: tanh_f64: -; CHECK: bl tanh define double @tanh_f64(double %x) #0 { +; CHECK-LABEL: tanh_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.tanh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: pow_f64: -; CHECK: bl pow define double @pow_f64(double %x, double %y) #0 { +; CHECK-LABEL: pow_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl pow +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.pow.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: log_f64: -; CHECK: bl log define double @log_f64(double %x) #0 { +; CHECK-LABEL: log_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.log.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: log10_f64: -; CHECK: bl log10 define double @log10_f64(double %x) #0 { +; CHECK-LABEL: log10_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.log10.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: log2_f64: -; CHECK: bl log2 define double @log2_f64(double %x) #0 { +; CHECK-LABEL: log2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.log2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: exp_f64: -; CHECK: bl exp define double @exp_f64(double %x) #0 { +; CHECK-LABEL: exp_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.exp.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: exp2_f64: -; CHECK: bl exp2 define double @exp2_f64(double %x) #0 { +; CHECK-LABEL: exp2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.exp2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: rint_f64: -; CHECK: frintx d0, d0 define double @rint_f64(double %x) #0 { +; CHECK-LABEL: rint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.rint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: nearbyint_f64: -; CHECK: frinti d0, d0 define double @nearbyint_f64(double %x) #0 { +; CHECK-LABEL: nearbyint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinti d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.nearbyint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: lrint_f64: -; CHECK: frintx [[REG:d[0-9]+]], d0 -; CHECK: fcvtzs w0, [[REG]] define i32 @lrint_f64(double %x) #0 { +; CHECK-LABEL: lrint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llrint_f64: -; CHECK: frintx [[REG:d[0-9]+]], d0 -; CHECK: fcvtzs x0, [[REG]] define i64 @llrint_f64(double %x) #0 { +; CHECK-LABEL: llrint_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: maxnum_f64: -; CHECK: fmaxnm d0, d0, d1 define double @maxnum_f64(double %x, double %y) #0 { +; CHECK-LABEL: maxnum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.maxnum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: minnum_f64: -; CHECK: fminnm d0, d0, d1 define double @minnum_f64(double %x, double %y) #0 { +; CHECK-LABEL: minnum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.minnum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: maximum_f64: -; CHECK: fmax d0, d0, d1 define double @maximum_f64(double %x, double %y) #0 { +; CHECK-LABEL: maximum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.maximum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: minimum_f64: -; CHECK: fmin d0, d0, d1 define double @minimum_f64(double %x, double %y) #0 { +; CHECK-LABEL: minimum_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin d0, d0, d1 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.minimum.f64(double %x, double %y, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: ceil_f64: -; CHECK: frintp d0, d0 define double @ceil_f64(double %x) #0 { +; CHECK-LABEL: ceil_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintp d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.ceil.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: floor_f64: -; CHECK: frintm d0, d0 define double @floor_f64(double %x) #0 { +; CHECK-LABEL: floor_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintm d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.floor.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: lround_f64: -; CHECK: fcvtas w0, d0 define i32 @lround_f64(double %x) #0 { +; CHECK-LABEL: lround_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas w0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llround_f64: -; CHECK: fcvtas x0, d0 define i64 @llround_f64(double %x) #0 { +; CHECK-LABEL: llround_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas x0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: round_f64: -; CHECK: frinta d0, d0 define double @round_f64(double %x) #0 { +; CHECK-LABEL: round_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frinta d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.round.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: roundeven_f64: -; CHECK: frintn d0, d0 define double @roundeven_f64(double %x) #0 { +; CHECK-LABEL: roundeven_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintn d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: trunc_f64: -; CHECK: frintz d0, d0 define double @trunc_f64(double %x) #0 { +; CHECK-LABEL: trunc_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.trunc.f64(double %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fcmp_olt_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_olt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_olt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ole_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ole_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ole_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ogt_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ogt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ogt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oge_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_oge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_oge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oeq_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_oeq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_oeq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_one_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_one_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_one_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ult_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ult_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ult_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ule_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ule_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ule_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ugt_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ugt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ugt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_uge_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_uge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_uge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ueq_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_ueq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_ueq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_une_f64: -; CHECK: fcmp d0, d1 define i32 @fcmp_une_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmp_une_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_olt_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_olt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_olt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ole_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ole_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ole_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ogt_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ogt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ogt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oge_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_oge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_oge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oeq_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_oeq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_oeq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_one_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_one_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_one_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w8, mi +; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ult_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ult_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ult_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ule_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ule_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ule_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ugt_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ugt_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ugt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_uge_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_uge_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_uge_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ueq_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_ueq_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_ueq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: csinc w0, w8, wzr, vc +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_une_f64: -; CHECK: fcmpe d0, d1 define i32 @fcmps_une_f64(double %a, double %b) #0 { +; CHECK-LABEL: fcmps_une_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmpe d0, d1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -1086,515 +1834,1015 @@ define i32 @fcmps_une_f64(double %a, double %b) #0 { ; Long-double-precision intrinsics -; CHECK-LABEL: add_f128: -; CHECK: bl __addtf3 define fp128 @add_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: add_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __addtf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sub_f128: -; CHECK: bl __subtf3 define fp128 @sub_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: sub_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __subtf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: mul_f128: -; CHECK: bl __multf3 define fp128 @mul_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: mul_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __multf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: div_f128: -; CHECK: bl __divtf3 define fp128 @div_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: div_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __divtf3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: frem_f128: -; CHECK: bl fmodl define fp128 @frem_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: frem_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmodl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fma_f128: -; CHECK: fmal define fp128 @fma_f128(fp128 %x, fp128 %y, fp128 %z) #0 { +; CHECK-LABEL: fma_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmal +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fptosi_i32_f128: -; CHECK: bl __fixtfsi define i32 @fptosi_i32_f128(fp128 %x) #0 { +; CHECK-LABEL: fptosi_i32_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptoui_i32_f128: -; CHECK: bl __fixunstfsi define i32 @fptoui_i32_f128(fp128 %x) #0 { +; CHECK-LABEL: fptoui_i32_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: fptosi_i64_f128: -; CHECK: bl __fixtfdi define i64 @fptosi_i64_f128(fp128 %x) #0 { +; CHECK-LABEL: fptosi_i64_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: fptoui_i64_f128: -; CHECK: bl __fixunstfdi define i64 @fptoui_i64_f128(fp128 %x) #0 { +; CHECK-LABEL: fptoui_i64_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: sitofp_f128_i32: -; CHECK: bl __floatsitf define fp128 @sitofp_f128_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f128_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatsitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: uitofp_f128_i32: -; CHECK: bl __floatunsitf define fp128 @uitofp_f128_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f128_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatunsitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sitofp_f128_i64: -; CHECK: bl __floatditf define fp128 @sitofp_f128_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f128_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatditf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: uitofp_f128_i64: -; CHECK: bl __floatunditf define fp128 @uitofp_f128_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f128_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatunditf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sitofp_f128_i128: -; CHECK: bl __floattitf define fp128 @sitofp_f128_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f128_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floattitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: uitofp_f128_i128: -; CHECK: bl __floatuntitf define fp128 @uitofp_f128_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f128_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __floatuntitf +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sqrt_f128: -; CHECK: bl sqrtl define fp128 @sqrt_f128(fp128 %x) #0 { +; CHECK-LABEL: sqrt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sqrtl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: powi_f128: -; CHECK: bl __powitf2 define fp128 @powi_f128(fp128 %x, i32 %y) #0 { +; CHECK-LABEL: powi_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __powitf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sin_f128: -; CHECK: bl sinl define fp128 @sin_f128(fp128 %x) #0 { +; CHECK-LABEL: sin_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: cos_f128: -; CHECK: bl cosl define fp128 @cos_f128(fp128 %x) #0 { +; CHECK-LABEL: cos_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: tan_f128: -; CHECK: bl tanl define fp128 @tan_f128(fp128 %x) #0 { +; CHECK-LABEL: tan_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: asin_f128: -; CHECK: bl asinl define fp128 @asin_f128(fp128 %x) #0 { +; CHECK-LABEL: asin_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asinl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.asin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: acos_f128: -; CHECK: bl acosl define fp128 @acos_f128(fp128 %x) #0 { +; CHECK-LABEL: acos_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acosl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.acos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: atan_f128: -; CHECK: bl atanl define fp128 @atan_f128(fp128 %x) #0 { +; CHECK-LABEL: atan_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atanl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.atan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: atan2_f128: -; CHECK: bl atan2l define fp128 @atan2_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: atan2_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sinh_f128: -; CHECK: bl sinhl define fp128 @sinh_f128(fp128 %x) #0 { +; CHECK-LABEL: sinh_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinhl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.sinh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: cosh_f128: -; CHECK: bl coshl define fp128 @cosh_f128(fp128 %x) #0 { +; CHECK-LABEL: cosh_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl coshl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.cosh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: tanh_f128: -; CHECK: bl tanhl define fp128 @tanh_f128(fp128 %x) #0 { +; CHECK-LABEL: tanh_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanhl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.tanh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: pow_f128: -; CHECK: bl powl define fp128 @pow_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: pow_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl powl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: log_f128: -; CHECK: bl logl define fp128 @log_f128(fp128 %x) #0 { +; CHECK-LABEL: log_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl logl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: log10_f128: -; CHECK: bl log10l define fp128 @log10_f128(fp128 %x) #0 { +; CHECK-LABEL: log10_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: log2_f128: -; CHECK: bl log2l define fp128 @log2_f128(fp128 %x) #0 { +; CHECK-LABEL: log2_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: exp_f128: -; CHECK: bl expl define fp128 @exp_f128(fp128 %x) #0 { +; CHECK-LABEL: exp_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl expl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: exp2_f128: -; CHECK: bl exp2l define fp128 @exp2_f128(fp128 %x) #0 { +; CHECK-LABEL: exp2_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2l +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: rint_f128: -; CHECK: bl rintl define fp128 @rint_f128(fp128 %x) #0 { +; CHECK-LABEL: rint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl rintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: nearbyint_f128: -; CHECK: bl nearbyintl define fp128 @nearbyint_f128(fp128 %x) #0 { +; CHECK-LABEL: nearbyint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl nearbyintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: lrint_f128: -; CHECK: bl lrintl define i32 @lrint_f128(fp128 %x) #0 { +; CHECK-LABEL: lrint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl lrintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llrint_f128: -; CHECK: bl llrintl define i64 @llrint_f128(fp128 %x) #0 { +; CHECK-LABEL: llrint_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llrintl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: maxnum_f128: -; CHECK: bl fmaxl define fp128 @maxnum_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: maxnum_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fmaxl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: minnum_f128: -; CHECK: bl fminl define fp128 @minnum_f128(fp128 %x, fp128 %y) #0 { +; CHECK-LABEL: minnum_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl fminl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: ceil_f128: -; CHECK: bl ceill define fp128 @ceil_f128(fp128 %x) #0 { +; CHECK-LABEL: ceil_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl ceill +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: floor_f128: -; CHECK: bl floorl define fp128 @floor_f128(fp128 %x) #0 { +; CHECK-LABEL: floor_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl floorl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: lround_f128: -; CHECK: bl lroundl define i32 @lround_f128(fp128 %x) #0 { +; CHECK-LABEL: lround_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl lroundl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } -; CHECK-LABEL: llround_f128: -; CHECK: bl llroundl define i64 @llround_f128(fp128 %x) #0 { +; CHECK-LABEL: llround_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl llroundl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } -; CHECK-LABEL: round_f128: -; CHECK: bl roundl define fp128 @round_f128(fp128 %x) #0 { +; CHECK-LABEL: round_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl roundl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: trunc_f128: -; CHECK: bl truncl define fp128 @trunc_f128(fp128 %x) #0 { +; CHECK-LABEL: trunc_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl truncl +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fcmp_olt_f128: -; CHECK: bl __lttf2 define i32 @fcmp_olt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_olt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ole_f128: -; CHECK: bl __letf2 define i32 @fcmp_ole_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ole_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ogt_f128: -; CHECK: bl __gttf2 define i32 @fcmp_ogt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ogt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oge_f128: -; CHECK: bl __getf2 define i32 @fcmp_oge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_oge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_oeq_f128: -; CHECK: bl __eqtf2 define i32 @fcmp_oeq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_oeq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_one_f128: -; CHECK: bl __eqtf2 define i32 @fcmp_one_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_one_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ult_f128: -; CHECK: bl __getf2 define i32 @fcmp_ult_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ult_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ule_f128: -; CHECK: bl __gttf2 define i32 @fcmp_ule_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ule_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ugt_f128: -; CHECK: bl __letf2 define i32 @fcmp_ugt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ugt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_uge_f128: -; CHECK: bl __lttf2 define i32 @fcmp_uge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_uge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_ueq_f128: -; CHECK: bl __eqtf2 define i32 @fcmp_ueq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_ueq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmp_une_f128: -; CHECK: bl __netf2 define i32 @fcmp_une_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmp_une_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __netf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_olt_f128: -; CHECK: bl __lttf2 define i32 @fcmps_olt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_olt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"olt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ole_f128: -; CHECK: bl __letf2 define i32 @fcmps_ole_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ole_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ole", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ogt_f128: -; CHECK: bl __gttf2 define i32 @fcmps_ogt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ogt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oge_f128: -; CHECK: bl __getf2 define i32 @fcmps_oge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_oge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"oge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_oeq_f128: -; CHECK: bl __eqtf2 define i32 @fcmps_oeq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_oeq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_one_f128: -; CHECK: bl __eqtf2 define i32 @fcmps_one_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_one_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ult_f128: -; CHECK: bl __getf2 define i32 @fcmps_ult_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ult_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __getf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ult", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ule_f128: -; CHECK: bl __gttf2 define i32 @fcmps_ule_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ule_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ule", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ugt_f128: -; CHECK: bl __letf2 define i32 @fcmps_ugt_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ugt_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __letf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ugt", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_uge_f128: -; CHECK: bl __lttf2 define i32 @fcmps_uge_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_uge_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"uge", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_ueq_f128: -; CHECK: bl __eqtf2 define i32 @fcmps_ueq_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_ueq_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ccmp w19, #0, #4, eq +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv } -; CHECK-LABEL: fcmps_une_f128: -; CHECK: bl __netf2 define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 { +; CHECK-LABEL: fcmps_une_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __netf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0 %conv = zext i1 %cmp to i32 ret i32 %conv @@ -1603,156 +2851,280 @@ define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 { ; Intrinsics to convert between floating-point types -; CHECK-LABEL: fptrunc_f32_f64: -; CHECK: fcvt s0, d0 define float @fptrunc_f32_f64(double %x) #0 { +; CHECK-LABEL: fptrunc_f32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fptrunc_f32_f128: -; CHECK: bl __trunctfsf2 define float @fptrunc_f32_f128(fp128 %x) #0 { +; CHECK-LABEL: fptrunc_f32_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __trunctfsf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %val } -; CHECK-LABEL: fptrunc_f64_f128: -; CHECK: bl __trunctfdf2 define double @fptrunc_f64_f128(fp128 %x) #0 { +; CHECK-LABEL: fptrunc_f64_f128: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __trunctfdf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fpext_f64_f32: -; CHECK: fcvt d0, s0 define double @fpext_f64_f32(float %x) #0 { +; CHECK-LABEL: fpext_f64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt d0, s0 +; CHECK-NEXT: ret %val = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, metadata !"fpexcept.strict") #0 ret double %val } -; CHECK-LABEL: fpext_f128_f32: -; CHECK: bl __extendsftf2 define fp128 @fpext_f128_f32(float %x) #0 { +; CHECK-LABEL: fpext_f128_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __extendsftf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: fpext_f128_f64: -; CHECK: bl __extenddftf2 define fp128 @fpext_f128_f64(double %x) #0 { +; CHECK-LABEL: fpext_f128_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __extenddftf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %x, metadata !"fpexcept.strict") #0 ret fp128 %val } -; CHECK-LABEL: sin_v1f64: -; CHECK: bl sin define <1 x double> @sin_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: sin_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.sin.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: cos_v1f64: -; CHECK: bl cos define <1 x double> @cos_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: cos_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.cos.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: tan_v1f64: -; CHECK: bl tan define <1 x double> @tan_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: tan_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.tan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: asin_v1f64: -; CHECK: bl asin define <1 x double> @asin_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: asin_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl asin +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.asin.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: acos_v1f64: -; CHECK: bl acos define <1 x double> @acos_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: acos_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl acos +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.acos.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: atan_v1f64: -; CHECK: bl atan define <1 x double> @atan_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: atan_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.atan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: atan2_v1f64: -; CHECK: bl atan2 define <1 x double> @atan2_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: atan2_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl atan2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.atan2.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: sinh_v1f64: -; CHECK: bl sinh define <1 x double> @sinh_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: sinh_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl sinh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.sinh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: cosh_v1f64: -; CHECK: bl cosh define <1 x double> @cosh_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: cosh_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl cosh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.cosh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: tanh_v1f64: -; CHECK: bl tanh define <1 x double> @tanh_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: tanh_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl tanh +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.tanh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: pow_v1f64: -; CHECK: bl pow define <1 x double> @pow_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: pow_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl pow +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.pow.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: log_v1f64: -; CHECK: bl log define <1 x double> @log_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: log_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.log.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: log2_v1f64: -; CHECK: bl log2 define <1 x double> @log2_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: log2_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.log2.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: log10_v1f64: -; CHECK: bl log10 define <1 x double> @log10_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: log10_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl log10 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.log10.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: exp_v1f64: -; CHECK: bl exp define <1 x double> @exp_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: exp_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.exp.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } -; CHECK-LABEL: exp2_v1f64: -; CHECK: bl exp2 define <1 x double> @exp2_v1f64(<1 x double> %x, <1 x double> %y) #0 { +; CHECK-LABEL: exp2_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl exp2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %val = call <1 x double> @llvm.experimental.constrained.exp2.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret <1 x double> %val } @@ -1918,3 +3290,7 @@ declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll index 17c87a5dae4199..bfb5c67801e6c2 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 ; ; 32-bit float to signed integer diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll index 3c19fca4a22aef..0dea7be5052d03 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 ; ; 32-bit float to unsigned integer diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index 20a6dd0899b40a..3037a9552bc27e 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i8 @llvm.fshl.i8(i8, i8, i8) declare i16 @llvm.fshl.i16(i16, i16, i16) diff --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir index c9d23006d35234..7935c09d7df5ec 100644 --- a/llvm/test/CodeGen/AArch64/init-undef.mir +++ b/llvm/test/CodeGen/AArch64/init-undef.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -run-pass=init-undef -o - %s | FileCheck %s -# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -run-pass=init-undef -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-- -run-pass=init-undef -o - %s | FileCheck %s --- name: test_stxp_undef diff --git a/llvm/test/CodeGen/AArch64/itofp-bf16.ll b/llvm/test/CodeGen/AArch64/itofp-bf16.ll index 978fe0b5ba3b3c..58591b11c184fb 100644 --- a/llvm/test/CodeGen/AArch64/itofp-bf16.ll +++ b/llvm/test/CodeGen/AArch64/itofp-bf16.ll @@ -4,6 +4,63 @@ ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; CHECK-GI: warning: Instruction selection used fallback path for stofp_i64_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i64_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i32_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i32_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i16_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i16_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i8_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i8_bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i64_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i64_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i64_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i64_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i64_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i64_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i64_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i64_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i64_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i64_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i64_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i64_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i32_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i32_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i32_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i32_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i32_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i32_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i32_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i32_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i32_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i32_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i32_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i32_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i16_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i16_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i16_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i16_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i16_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i16_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i16_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i16_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i16_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i16_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i16_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i16_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i8_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i8_v2bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i8_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i8_v4bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i8_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i8_v8bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i8_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i8_v16bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i8_v32bf16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i8_v32bf16 + define bfloat @stofp_i64_bf16(i64 %a) { ; CHECK-LABEL: stofp_i64_bf16: ; CHECK: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir index a10d7588cb4429..8e29255189bf53 100644 --- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir +++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir @@ -155,7 +155,7 @@ body: | ; CHECK: liveins: $q0, $q1, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1, implicit $w1_hi :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK-NEXT: RET undef $lr renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) @@ -246,7 +246,7 @@ body: | ; CHECK-LABEL: name: 9-ldrspre-ldrsui-mod-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) ; CHECK-NEXT: renamable $x1 = LDRXui renamable $x0, 1 :: (load (s64)) ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) @@ -280,7 +280,7 @@ body: | ; CHECK-LABEL: name: 10-ldrspre-ldrsui-used-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) ; CHECK-NEXT: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) ; CHECK-NEXT: STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) @@ -315,12 +315,12 @@ body: | ; CHECK-LABEL: name: 11-ldrqpre-ldrqpre-no-merge ; CHECK: liveins: $q0, $q1, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1, implicit $w1_hi :: (load (s128)) - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1, implicit $w1_hi :: (load (s128)) - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1, implicit $w1_hi :: (load (s128)) - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1, implicit $w1_hi :: (load (s128)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1, implicit $w1_hi :: (load (s128)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1, implicit $w1_hi :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) @@ -352,7 +352,7 @@ body: | ; CHECK-LABEL: name: 12-ldrspre-ldrsui-no-merge ; CHECK: liveins: $s0, $s1, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32)) ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) ; CHECK-NEXT: RET undef $lr @@ -383,7 +383,7 @@ body: | ; CHECK-LABEL: name: 13-ldrqpre-ldrdui-no-merge ; CHECK: liveins: $d1, $q0, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1, implicit $w1_hi :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) ; CHECK-NEXT: renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) ; CHECK-NEXT: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) ; CHECK-NEXT: STRDui renamable $d1, renamable $x1, 1 :: (store (s64)) @@ -415,7 +415,7 @@ body: | ; CHECK-LABEL: name: 14-ldrqpre-strqui-no-merge ; CHECK: liveins: $q0, $q1, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1, implicit $w1_hi :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) ; CHECK-NEXT: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) @@ -473,7 +473,7 @@ body: | ; CHECK-LABEL: name: 16-ldrqpre-ldrqui-diff-base-reg-no-merge ; CHECK: liveins: $q0, $q1, $x1, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1, implicit $w1_hi :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) ; CHECK-NEXT: renamable $q1 = LDRQui renamable $x2, 1 :: (load (s128)) ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK-NEXT: RET undef $lr @@ -534,7 +534,7 @@ body: | ; CHECK-LABEL: name: 18-ldrqpre-ldurqi-no-merge ; CHECK: liveins: $q0, $q1, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1, implicit $w1_hi :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) ; CHECK-NEXT: renamable $q1 = LDURQi renamable $x1, 1 :: (load (s128)) ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK-NEXT: RET undef $lr @@ -587,7 +587,7 @@ body: | ; CHECK-LABEL: name: 20-ldrspre-ldrsui-unaligned-no-merge ; CHECK: liveins: $s0, $s1, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load (s32)) ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) ; CHECK-NEXT: RET undef $lr @@ -667,7 +667,7 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) @@ -694,7 +694,7 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) @@ -720,12 +720,12 @@ body: | ; CHECK-LABEL: name: 25-ldrswpre-ldrswpre-no-merge ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 48, implicit $w1, implicit $w1_hi :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 1, implicit $w1, implicit $w1_hi :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1, implicit $w1_hi :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 12, implicit $w1, implicit $w1_hi :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1, implicit $w1_hi :: (load (s32)) - ; CHECK-NEXT: early-clobber renamable $x1, renamable $x2 = LDRSWpre renamable $x1, 16, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 48, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 1, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x2 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 48 :: (load (s32)) @@ -755,8 +755,8 @@ body: | ; CHECK-LABEL: name: 26-ldrswpre-ldrwui-no-merge ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1, implicit $w1_hi :: (load (s32)) - ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2, implicit $w2_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2 :: (load (s32)) ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) @@ -782,7 +782,7 @@ body: | ; CHECK-LABEL: name: 27-ldrwpre-ldrswui-no-merge ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x1, renamable $w0 = LDRWpre renamable $x1, 40, implicit $w1, implicit $w1_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $w0 = LDRWpre renamable $x1, 40, implicit $w1 :: (load (s32)) ; CHECK-NEXT: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr @@ -808,9 +808,9 @@ body: | ; CHECK-LABEL: name: 28-ldrswpre-ldrwpre-no-merge ; CHECK: liveins: $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 8, implicit $w11, implicit $w11_hi :: (load (s32), align 8) + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 8, implicit $w11 :: (load (s32), align 8) ; CHECK-NEXT: $x14 = EORXrs renamable $x11, renamable $x13, 0 - ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 4, implicit $w11, implicit $w11_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 4, implicit $w11 :: (load (s32)) ; CHECK-NEXT: $x13 = EORXrs renamable $x11, renamable $x13, 0 ; CHECK-NEXT: STPXi renamable $x13, renamable $x14, renamable $x11, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr @@ -838,9 +838,9 @@ body: | ; CHECK-LABEL: name: 29-ldrwpre-ldrswpre-no-merge ; CHECK: liveins: $x11, $x13 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 8, implicit $w11, implicit $w11_hi :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 8, implicit $w11 :: (load (s32)) ; CHECK-NEXT: $x14 = EORXrs renamable $x11, renamable $x13, 0 - ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 4, implicit $w11, implicit $w11_hi :: (load (s32), align 8) + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 4, implicit $w11 :: (load (s32), align 8) ; CHECK-NEXT: $x13 = EORXrs renamable $x11, renamable $x13, 0 ; CHECK-NEXT: STPXi renamable $x13, renamable $x14, renamable $x11, 0 :: (store (s64)) ; CHECK-NEXT: RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-calls.mir b/llvm/test/CodeGen/AArch64/machine-outliner-calls.mir index 700a5b228122fc..8abd56fa205499 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-calls.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-calls.mir @@ -57,7 +57,7 @@ body: | # CHECK: name: OUTLINED_FUNCTION_0 # CHECK: bb.0: -# CHECK: liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $d8, $d9, $d10, $d11, $d12, $d13, $d14, $d15, $lr +# CHECK: liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $d15, $d8, $d9, $d10, $d11, $d12, $d13, $d14, $lr # CHECK-DAG: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 # CHECK-NEXT: early-clobber $sp = STRXpre $lr, $sp, -16 diff --git a/llvm/test/CodeGen/AArch64/mingw-refptr.ll b/llvm/test/CodeGen/AArch64/mingw-refptr.ll index 306bee9f85c42e..cc9fac0506ff52 100644 --- a/llvm/test/CodeGen/AArch64/mingw-refptr.ll +++ b/llvm/test/CodeGen/AArch64/mingw-refptr.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=aarch64-w64-mingw32 | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ -; RUN: -mtriple=aarch64-w64-mingw32 2>&1| FileCheck %s --check-prefixes=GISEL,FALLBACK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64-w64-mingw32 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-w64-mingw32 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI @var = external local_unnamed_addr global i32, align 4 @dsolocalvar = external dso_local local_unnamed_addr global i32, align 4 @@ -10,10 +10,11 @@ define dso_local i32 @getVar() { ; CHECK-LABEL: getVar: -; CHECK: adrp x8, .refptr.var -; CHECK: ldr x8, [x8, :lo12:.refptr.var] -; CHECK: ldr w0, [x8] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .refptr.var +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.var] +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @var, align 4 ret i32 %0 @@ -21,9 +22,10 @@ entry: define dso_local i32 @getDsoLocalVar() { ; CHECK-LABEL: getDsoLocalVar: -; CHECK: adrp x8, dsolocalvar -; CHECK: ldr w0, [x8, :lo12:dsolocalvar] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, dsolocalvar +; CHECK-NEXT: ldr w0, [x8, :lo12:dsolocalvar] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @dsolocalvar, align 4 ret i32 %0 @@ -31,9 +33,10 @@ entry: define dso_local i32 @getLocalVar() { ; CHECK-LABEL: getLocalVar: -; CHECK: adrp x8, localvar -; CHECK: ldr w0, [x8, :lo12:localvar] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, localvar +; CHECK-NEXT: ldr w0, [x8, :lo12:localvar] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @localvar, align 4 ret i32 %0 @@ -41,9 +44,10 @@ entry: define dso_local i32 @getLocalCommon() { ; CHECK-LABEL: getLocalCommon: -; CHECK: adrp x8, localcommon -; CHECK: ldr w0, [x8, :lo12:localcommon] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, localcommon +; CHECK-NEXT: ldr w0, [x8, :lo12:localcommon] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @localcommon, align 4 ret i32 %0 @@ -51,10 +55,11 @@ entry: define dso_local i32 @getExtVar() { ; CHECK-LABEL: getExtVar: -; CHECK: adrp x8, __imp_extvar -; CHECK: ldr x8, [x8, :lo12:__imp_extvar] -; CHECK: ldr w0, [x8] -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, __imp_extvar +; CHECK-NEXT: ldr x8, [x8, :lo12:__imp_extvar] +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret entry: %0 = load i32, ptr @extvar, align 4 ret i32 %0 @@ -62,7 +67,8 @@ entry: define dso_local void @callFunc() { ; CHECK-LABEL: callFunc: -; CHECK: b otherFunc +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: b otherFunc entry: tail call void @otherFunc() ret void @@ -70,16 +76,40 @@ entry: declare dso_local void @otherFunc() -; FALLBACK-NOT: remark:{{.*}}sspFunc define dso_local void @sspFunc() #0 { ; CHECK-LABEL: sspFunc: -; CHECK: adrp x8, .refptr.__stack_chk_guard -; CHECK: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] -; CHECK: ldr x8, [x8] -; GISEL-LABEL: sspFunc: -; GISEL: adrp x8, .refptr.__stack_chk_guard -; GISEL: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] -; GISEL: ldr x8, [x8] +; CHECK: .seh_proc sspFunc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, .refptr.__stack_chk_guard +; CHECK-NEXT: add x0, sp, #7 +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: bl ptrUser +; CHECK-NEXT: adrp x8, .refptr.__stack_chk_guard +; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.__stack_chk_guard] +; CHECK-NEXT: ldr x9, [sp, #8] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB6_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %entry +; CHECK-NEXT: bl __stack_chk_fail +; CHECK-NEXT: brk #0x1 +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc entry: %c = alloca i8, align 1 call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %c) @@ -102,3 +132,7 @@ attributes #0 = { sspstrong } ; CHECK: .globl .refptr.var ; CHECK: .refptr.var: ; CHECK: .xword var + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/misched-bundle.mir b/llvm/test/CodeGen/AArch64/misched-bundle.mir index ac6112e8c60efa..9adcd2904a250a 100644 --- a/llvm/test/CodeGen/AArch64/misched-bundle.mir +++ b/llvm/test/CodeGen/AArch64/misched-bundle.mir @@ -17,18 +17,13 @@ # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: SU(1): renamable $z1 = LD1H renamable $p0, renamable $x2, renamable $x10 :: (load unknown-size, align 1) # CHECK-NEXT: # preds left : 0 -# CHECK-NEXT: # succs left : 9 +# CHECK-NEXT: # succs left : 4 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 3 # CHECK-NEXT: Depth : 0 # CHECK-NEXT: Height : 7 # CHECK-NEXT: Successors: # CHECK-NEXT: SU(7): Out Latency=1 -# CHECK-NEXT: SU(7): Out Latency=1 -# CHECK-NEXT: SU(7): Out Latency=1 -# CHECK-NEXT: SU(7): Out Latency=1 -# CHECK-NEXT: SU(7): Out Latency=1 -# CHECK-NEXT: SU(7): Out Latency=1 # CHECK-NEXT: SU(6): Data Latency=3 Reg=$z1 # CHECK-NEXT: SU(9): Ord Latency=0 Memory # CHECK-NEXT: SU(8): Ord Latency=0 Memory @@ -83,7 +78,7 @@ # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: SU(6): $z0 = FMAD_ZPmZZ_H renamable $p0, killed $z0(tied-def 0), killed renamable $z1, killed renamable $z2 # CHECK-NEXT: # preds left : 4 -# CHECK-NEXT: # succs left : 7 +# CHECK-NEXT: # succs left : 2 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 4 # CHECK-NEXT: Depth : 3 @@ -96,14 +91,9 @@ # CHECK-NEXT: Successors: # CHECK-NEXT: SU(8): Data Latency=4 Reg=$z0 # CHECK-NEXT: SU(7): Anti Latency=0 -# CHECK-NEXT: SU(7): Anti Latency=0 -# CHECK-NEXT: SU(7): Anti Latency=0 -# CHECK-NEXT: SU(7): Anti Latency=0 -# CHECK-NEXT: SU(7): Anti Latency=0 -# CHECK-NEXT: SU(7): Anti Latency=0 # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: SU(7): BUNDLE implicit-def $z1, implicit-def $q1, implicit-def $d1, implicit-def $s1, implicit-def $h1, implicit-def $b1, implicit $z5, implicit $p0, implicit killed $z4, implicit killed $z3 -# CHECK-NEXT: # preds left : 15 +# CHECK-NEXT: # preds left : 5 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 1 @@ -111,20 +101,10 @@ # CHECK-NEXT: Height : 4 # CHECK-NEXT: Predecessors: # CHECK-NEXT: SU(6): Anti Latency=0 -# CHECK-NEXT: SU(6): Anti Latency=0 -# CHECK-NEXT: SU(6): Anti Latency=0 -# CHECK-NEXT: SU(6): Anti Latency=0 -# CHECK-NEXT: SU(6): Anti Latency=0 -# CHECK-NEXT: SU(6): Anti Latency=0 # CHECK-NEXT: SU(5): Data Latency=3 Reg=$z5 # CHECK-NEXT: SU(4): Data Latency=3 Reg=$z4 # CHECK-NEXT: SU(3): Data Latency=3 Reg=$z3 # CHECK-NEXT: SU(1): Out Latency=1 -# CHECK-NEXT: SU(1): Out Latency=1 -# CHECK-NEXT: SU(1): Out Latency=1 -# CHECK-NEXT: SU(1): Out Latency=1 -# CHECK-NEXT: SU(1): Out Latency=1 -# CHECK-NEXT: SU(1): Out Latency=1 # CHECK-NEXT: Successors: # CHECK-NEXT: SU(9): Data Latency=4 Reg=$z1 # CHECK-NEXT: Single Issue : false; diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir index 6fb8ba2dfc839e..6cdbbb8c53d690 100644 --- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir +++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir @@ -1583,18 +1583,6 @@ body: | # CHECK-NEXT: B0 [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r # CHECK-NEXT: B1 [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r # CHECK-NEXT: B2 [0B,96r:0) 0@0B-phi -# CHECK-NEXT: B0_HI [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r -# CHECK-NEXT: H0_HI [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r -# CHECK-NEXT: S0_HI [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r -# CHECK-NEXT: B1_HI [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r -# CHECK-NEXT: H1_HI [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r -# CHECK-NEXT: S1_HI [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r -# CHECK-NEXT: B2_HI [0B,96r:0) 0@0B-phi -# CHECK-NEXT: H2_HI [0B,96r:0) 0@0B-phi -# CHECK-NEXT: S2_HI [0B,96r:0) 0@0B-phi -# CHECK-NEXT: D0_HI [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r -# CHECK-NEXT: D1_HI [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r -# CHECK-NEXT: D2_HI [0B,96r:0) 0@0B-phi # CHECK-NEXT: %0 [48r,168r:0) 0@48r weight:0.000000e+00 # CHECK-NEXT: %1 [88r,120r:0) 0@88r weight:0.000000e+00 # CHECK-NEXT: %2 [96r,128r:0) 0@96r weight:0.000000e+00 diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir index 9c9b6e281b15da..2cc63018813817 100644 --- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir +++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir @@ -466,9 +466,6 @@ body: | # CHECK-NEXT: W0 [0B,16r:0) 0@0B-phi # CHECK-NEXT: W1 [0B,32r:0) 0@0B-phi # CHECK-NEXT: W2 [0B,48r:0) 0@0B-phi -# CHECK-NEXT: W0_HI [0B,16r:0) 0@0B-phi -# CHECK-NEXT: W1_HI [0B,32r:0) 0@0B-phi -# CHECK-NEXT: W2_HI [0B,48r:0) 0@0B-phi # CHECK-NEXT: RegMasks: # CHECK-NEXT: ********** MACHINEINSTRS ********** # CHECK-NEXT: # Machine code for function f: IsSSA, NoPHIs, TracksLiveness, NoVRegs @@ -479,4 +476,4 @@ body: | # CHECK-NEXT: 32B $x4 = ADDXrr $x1, $x1 # CHECK-NEXT: 48B $x5 = ADDXrr $x2, $x2 # CHECK-EMPTY: -# CHECK-NEXT: # End machine code for function f. +# CHECK-NEXT: # End machine code for function f. \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll index 32bc5c5e63b3e1..5b9f438ed1d437 100644 --- a/llvm/test/CodeGen/AArch64/mulcmle.ll +++ b/llvm/test/CodeGen/AArch64/mulcmle.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 %s -o - -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <1 x i64> @v1i64(<1 x i64> %a) { ; CHECK-SD-LABEL: v1i64: diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll index def0f15790a9ba..7218204ba844ca 100644 --- a/llvm/test/CodeGen/AArch64/neon-perm.ll +++ b/llvm/test/CodeGen/AArch64/neon-perm.ll @@ -1,13 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI %struct.int8x8x2_t = type { [2 x <8 x i8>] } %struct.int16x4x2_t = type { [2 x <4 x i16>] } diff --git a/llvm/test/CodeGen/AArch64/neon-vector-splat.ll b/llvm/test/CodeGen/AArch64/neon-vector-splat.ll index 489eaf179a1bd2..d3846cab46f55d 100644 --- a/llvm/test/CodeGen/AArch64/neon-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/neon-vector-splat.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for shuffle8 +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <2 x i32> @shuffle(ptr %P) { ; CHECK-SD-LABEL: shuffle: @@ -116,10 +114,16 @@ define <2 x i64> @shuffle7(ptr %P) { } define <2 x ptr> @shuffle8(ptr %P) { -; CHECK-LABEL: shuffle8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1r { v0.2d }, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shuffle8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1r { v0.2d }, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shuffle8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: dup v0.2d, v0.d[0] +; CHECK-GI-NEXT: ret %lv2ptr = load <2 x ptr>, ptr %P %sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer ret <2 x ptr> %sv2ptr diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll index 977141f2b84f4f..489d46f8b0e727 100644 --- a/llvm/test/CodeGen/AArch64/overflow.ll +++ b/llvm/test/CodeGen/AArch64/overflow.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,SDAG -; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,GISEL - +; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -global-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI define zeroext i1 @saddo1.i32.unused(i32 %v1, i32 %v2, ptr %res) { ; CHECK-LABEL: saddo1.i32.unused: @@ -105,19 +104,19 @@ entry: ret i1 %obit } define zeroext i1 @saddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) { -; SDAG-LABEL: saddo.add.i32: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add w8, w4, #100 -; SDAG-NEXT: subs w8, w8, #100 -; SDAG-NEXT: cset w0, vs -; SDAG-NEXT: str w8, [x5] -; SDAG-NEXT: ret +; CHECK-SD-LABEL: saddo.add.i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: add w8, w4, #100 +; CHECK-SD-NEXT: subs w8, w8, #100 +; CHECK-SD-NEXT: cset w0, vs +; CHECK-SD-NEXT: str w8, [x5] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: saddo.add.i32: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: str w4, [x5] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: saddo.add.i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: str w4, [x5] +; CHECK-GI-NEXT: ret entry: %lhs = add nsw i32 %v5, 100 %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %lhs, i32 -100) @@ -128,20 +127,20 @@ entry: } define zeroext i1 @uaddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) { -; SDAG-LABEL: uaddo.add.i32: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add w8, w4, #5 -; SDAG-NEXT: adds w8, w8, #5 -; SDAG-NEXT: cset w0, hs -; SDAG-NEXT: str w8, [x5] -; SDAG-NEXT: ret +; CHECK-SD-LABEL: uaddo.add.i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: add w8, w4, #5 +; CHECK-SD-NEXT: adds w8, w8, #5 +; CHECK-SD-NEXT: cset w0, hs +; CHECK-SD-NEXT: str w8, [x5] +; CHECK-SD-NEXT: ret ; -; GISEL-LABEL: uaddo.add.i32: -; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adds w8, w4, #10 -; GISEL-NEXT: cset w0, hs -; GISEL-NEXT: str w8, [x5] -; GISEL-NEXT: ret +; CHECK-GI-LABEL: uaddo.add.i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adds w8, w4, #10 +; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: str w8, [x5] +; CHECK-GI-NEXT: ret entry: %lhs = add nuw i32 %v5, 5 %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %lhs, i32 5) diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir index 3174d3c8c1a737..f8af5b96370178 100644 --- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -487,7 +487,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]] - ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3735562 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3342346 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]] ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF @@ -505,7 +505,7 @@ body: | %0:gpr64common = COPY $x0 %2:gpr64all = IMPLICIT_DEF %3:gpr64sp = COPY %2 - INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3735562 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3 + INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 3342346 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3 %4:fpr128 = MOVIv2d_ns 0 %5:fpr64 = COPY %4.dsub %7:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/phi.ll b/llvm/test/CodeGen/AArch64/phi.ll index eeafbaffbcc695..55942d0e421bb9 100644 --- a/llvm/test/CodeGen/AArch64/phi.ll +++ b/llvm/test/CodeGen/AArch64/phi.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64 -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i8 @ti8(i1 %c, ptr %p, i8 %a, i8 %b) { ; CHECK-SD-LABEL: ti8: diff --git a/llvm/test/CodeGen/AArch64/preserve.ll b/llvm/test/CodeGen/AArch64/preserve.ll index 49fb3685bcfc10..ffd479e1bd739e 100644 --- a/llvm/test/CodeGen/AArch64/preserve.ll +++ b/llvm/test/CodeGen/AArch64/preserve.ll @@ -8,19 +8,20 @@ target triple = "aarch64-unknown-unknown" declare void @bar1() define preserve_mostcc void @baz() #0 { -; CHECK: baz Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $b16_hi $b17_hi $b18_hi $b19_hi $b20_hi $b21_hi $b22_hi $b23_hi $b24_hi $b25_hi $b26_hi $b27_hi $b28_hi $b29_hi $b30_hi $b31_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $d8_hi $d9_hi $d10_hi $d11_hi $d12_hi $d13_hi $d14_hi $d15_hi $d16_hi $d17_hi $d18_hi $d19_hi $d20_hi $d21_hi $d22_hi $d23_hi $d24_hi $d25_hi $d26_hi $d27_hi $d28_hi $d29_hi $d30_hi $d31_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $h16_hi $h17_hi $h18_hi $h19_hi $h20_hi $h21_hi $h22_hi $h23_hi $h24_hi $h25_hi $h26_hi $h27_hi $h28_hi $h29_hi $h30_hi $h31_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $s16_hi $s17_hi $s18_hi $s19_hi $s20_hi $s21_hi $s22_hi $s23_hi $s24_hi $s25_hi $s26_hi $s27_hi $s28_hi $s29_hi $s30_hi $s31_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: baz Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 call void @bar1() call void @bar2() ret void } define preserve_allcc void @foo() #0 { -; CHECK: foo Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: foo Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 call void @bar1() call void @bar2() ret void } define preserve_nonecc void @qux() #0 { -; CHECK: qux Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10 $w11 $w12 $w13 $w14 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x13 $x14 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $b16_hi $b17_hi $b18_hi $b19_hi $b20_hi $b21_hi $b22_hi $b23_hi $b24_hi $b25_hi $b26_hi $b27_hi $b28_hi $b29_hi $b30_hi $b31_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $d8_hi $d9_hi $d10_hi $d11_hi $d12_hi $d13_hi $d14_hi $d15_hi $d16_hi $d17_hi $d18_hi $d19_hi $d20_hi $d21_hi $d22_hi $d23_hi $d24_hi $d25_hi $d26_hi $d27_hi $d28_hi $d29_hi $d30_hi $d31_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $h16_hi $h17_hi $h18_hi $h19_hi $h20_hi $h21_hi $h22_hi $h23_hi $h24_hi $h25_hi $h26_hi $h27_hi $h28_hi $h29_hi $h30_hi $h31_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $s16_hi $s17_hi $s18_hi $s19_hi $s20_hi $s21_hi $s22_hi $s23_hi $s24_hi $s25_hi $s26_hi $s27_hi $s28_hi $s29_hi $s30_hi $s31_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w9_hi $w10_hi $w11_hi $w12_hi $w13_hi $w14_hi $w15_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: qux Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w10 $w11 $w12 $w13 $w14 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x10 $x11 $x12 $x13 $x14 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 + call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir deleted file mode 100644 index b61fa4be040070..00000000000000 --- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir +++ /dev/null @@ -1,38 +0,0 @@ -# RUN: llc -mtriple=aarch64 -verify-machineinstrs -o - -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking %s | FileCheck %s --check-prefix=CHECK -# RUN: llc -mtriple=aarch64 -verify-machineinstrs -o /dev/null -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking -debug-only=regalloc %s 2>&1 | FileCheck %s --check-prefix=CHECK-DBG -# REQUIRES: asserts - -# CHECK-DBG: ********** REGISTER COALESCER ********** -# CHECK-DBG: ********** Function: test -# CHECK-DBG: ********** JOINING INTERVALS *********** -# CHECK-DBG: ********** INTERVALS ********** -# CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00 -# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 -# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 -# CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00 ---- -name: test -tracksRegLiveness: true -fixedStack: [] -stack: - - { id: 0, name: '', type: default, offset: 0, size: 65, alignment: 16, - stack-id: default } -body: | - bb.0.entry: - ; CHECK-LABEL: name: test - ; CHECK: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 - ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = nuw ADDXri [[ADDXri]], 64, 0 - ; CHECK-NEXT: undef [[MOVi32imm:%[0-9]+]].sub_32:gpr64 = MOVi32imm 64 - ; CHECK-NEXT: undef [[MOVi32imm1:%[0-9]+]].sub_32:gpr64 = MOVi32imm 64 - ; CHECK-NEXT: dead [[ADDXri1]]:gpr64common, dead early-clobber [[MOVi32imm1]]:gpr64 = MOPSMemorySetPseudo [[ADDXri1]], [[MOVi32imm1]], [[MOVi32imm]], implicit-def dead $nzcv - ; CHECK-NEXT: RET_ReallyLR - %1:gpr64sp = ADDXri %stack.0, 0, 0 - %2:gpr64common = nuw ADDXri killed %1, 64, 0 - %3:gpr32 = MOVi32imm 64 - %4:gpr64 = SUBREG_TO_REG 0, killed %3, %subreg.sub_32 - %6:gpr64 = COPY %4 - %5:gpr64common = COPY killed %2 - dead %5:gpr64common, dead early-clobber %6:gpr64 = MOPSMemorySetPseudo %5, %6, %4, implicit-def dead $nzcv - RET_ReallyLR - -... diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index cb52c17e2531c8..d07fcbc29806f4 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll index f6fb4dd5e4b417..4a0e49518517bf 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 29318bd28c45d4..531562d3aa678e 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -494,21 +497,45 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x4 -; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, vs -; CHECK-NEXT: csel x1, x11, x9, vs -; CHECK-NEXT: adds x8, x2, x6 -; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x2, x10, x8, vs -; CHECK-NEXT: csel x3, x11, x9, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x4 +; CHECK-SD-NEXT: adcs x9, x1, x5 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x10, x8, vs +; CHECK-SD-NEXT: csel x1, x11, x9, vs +; CHECK-SD-NEXT: adds x8, x2, x6 +; CHECK-SD-NEXT: adcs x9, x3, x7 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x2, x10, x8, vs +; CHECK-SD-NEXT: csel x3, x11, x9, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds x9, x0, x4 +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: mov x13, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: adcs x10, x1, x5 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x14, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x0, x11, x9, ne +; CHECK-GI-NEXT: csel x1, x14, x10, ne +; CHECK-GI-NEXT: adds x9, x2, x6 +; CHECK-GI-NEXT: adcs x10, x3, x7 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x8, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x2, x11, x9, ne +; CHECK-GI-NEXT: csel x3, x8, x10, ne +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/selectopt.ll b/llvm/test/CodeGen/AArch64/selectopt.ll index 876d9e68d538aa..54309dca3b8345 100644 --- a/llvm/test/CodeGen/AArch64/selectopt.ll +++ b/llvm/test/CodeGen/AArch64/selectopt.ll @@ -343,15 +343,15 @@ define void @replace_or(ptr nocapture noundef %newst, ptr noundef %t, ptr nounde ; CHECKOO-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] ; CHECKOO-NEXT: [[ADD:%.*]] = zext i1 [[CMP84]] to i64 ; CHECKOO-NEXT: [[CMP84_FROZEN:%.*]] = freeze i1 [[CMP84]] -; CHECKOO-NEXT: [[TMP9:%.*]] = or i64 [[MUL]], 1 ; CHECKOO-NEXT: br i1 [[CMP84_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]] -; CHECKOO: select.false: -; CHECKOO-NEXT: br label [[SELECT_END]] +; CHECKOO: select.true.sink: +; CHECKOO-NEXT: [[TMP9:%.*]] = or disjoint i64 [[MUL]], 1 +; CHECKOO-NEXT: br label [[SELECT_FALSE]] ; CHECKOO: select.end: -; CHECKOO-NEXT: [[SPEC_SELECT:%.*]] = phi i64 [ [[TMP9]], [[IF_THEN]] ], [ [[MUL]], [[SELECT_FALSE]] ] +; CHECKOO-NEXT: [[SPEC_SELECT:%.*]] = phi i64 [ [[TMP9]], [[SELECT_END]] ], [ [[MUL]], [[IF_THEN]] ] ; CHECKOO-NEXT: br label [[IF_END87]] ; CHECKOO: if.end87: -; CHECKOO-NEXT: [[CMP_1]] = phi i64 [ [[MUL]], [[WHILE_BODY]] ], [ [[SPEC_SELECT]], [[SELECT_END]] ] +; CHECKOO-NEXT: [[CMP_1]] = phi i64 [ [[MUL]], [[WHILE_BODY]] ], [ [[SPEC_SELECT]], [[SELECT_FALSE]] ] ; CHECKOO-NEXT: [[CMP16_NOT:%.*]] = icmp sgt i64 [[CMP_1]], [[MA]] ; CHECKOO-NEXT: br i1 [[CMP16_NOT]], label [[WHILE_END]], label [[LAND_RHS]] ; CHECKOO: while.end: @@ -666,20 +666,15 @@ define i32 @or_samegroup(ptr nocapture noundef %x, i32 noundef %n, ptr nocapture ; CHECKOO-NEXT: [[CONV:%.*]] = zext i1 [[CMP5]] to i32 ; CHECKOO-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP5]] ; CHECKOO-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]] -; CHECKOO: select.false: -; CHECKOO-NEXT: br label [[SELECT_END]] +; CHECKOO: select.true.sink: +; CHECKOO-NEXT: [[TMP2:%.*]] = or i32 1, [[ADD]] +; CHECKOO-NEXT: br label [[SELECT_FALSE]] ; CHECKOO: select.end: -; CHECKOO-NEXT: [[SEL:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[SELECT_FALSE]] ] -; CHECKOO-NEXT: [[CMP5_FROZEN3:%.*]] = freeze i1 [[CMP5]] -; CHECKOO-NEXT: [[TMP2:%.*]] = or i32 [[SEL]], 1 -; CHECKOO-NEXT: br i1 [[CMP5_FROZEN3]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]] -; CHECKOO: select.false2: -; CHECKOO-NEXT: br label [[SELECT_END1]] -; CHECKOO: select.end1: -; CHECKOO-NEXT: [[OR:%.*]] = phi i32 [ [[TMP2]], [[SELECT_END]] ], [ [[SEL]], [[SELECT_FALSE2]] ] +; CHECKOO-NEXT: [[SEL:%.*]] = phi i32 [ [[ADD]], [[SELECT_END]] ], [ 1, [[IF_THEN]] ] +; CHECKOO-NEXT: [[OR:%.*]] = phi i32 [ [[TMP2]], [[SELECT_END]] ], [ 1, [[IF_THEN]] ] ; CHECKOO-NEXT: br label [[IF_END]] ; CHECKOO: if.end: -; CHECKOO-NEXT: [[Y_1]] = phi i32 [ [[SEL]], [[SELECT_END1]] ], [ 0, [[FOR_BODY]] ] +; CHECKOO-NEXT: [[Y_1]] = phi i32 [ [[SEL]], [[SELECT_FALSE]] ], [ 0, [[FOR_BODY]] ] ; CHECKOO-NEXT: store i32 [[Y_1]], ptr [[ARRAYIDX]], align 4 ; CHECKOO-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECKOO-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] @@ -790,15 +785,15 @@ define i32 @or_oneusevalues(ptr nocapture noundef %x, i32 noundef %n, ptr nocapt ; CHECKOO-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], 1 ; CHECKOO-NEXT: [[ADD2:%.*]] = or i32 [[ADD1]], 1 ; CHECKOO-NEXT: [[CMP5_FROZEN:%.*]] = freeze i1 [[CMP5]] -; CHECKOO-NEXT: [[TMP2:%.*]] = or i32 [[ADD2]], 1 ; CHECKOO-NEXT: br i1 [[CMP5_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]] -; CHECKOO: select.false: -; CHECKOO-NEXT: br label [[SELECT_END]] +; CHECKOO: select.true.sink: +; CHECKOO-NEXT: [[TMP2:%.*]] = or i32 1, [[ADD2]] +; CHECKOO-NEXT: br label [[SELECT_FALSE]] ; CHECKOO: select.end: -; CHECKOO-NEXT: [[OR:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[ADD2]], [[SELECT_FALSE]] ] +; CHECKOO-NEXT: [[OR:%.*]] = phi i32 [ [[TMP2]], [[SELECT_END]] ], [ [[ADD2]], [[IF_THEN]] ] ; CHECKOO-NEXT: br label [[IF_END]] ; CHECKOO: if.end: -; CHECKOO-NEXT: [[Y_1]] = phi i32 [ [[OR]], [[SELECT_END]] ], [ 0, [[FOR_BODY]] ] +; CHECKOO-NEXT: [[Y_1]] = phi i32 [ [[OR]], [[SELECT_FALSE]] ], [ 0, [[FOR_BODY]] ] ; CHECKOO-NEXT: store i32 [[Y_1]], ptr [[ARRAYIDX]], align 4 ; CHECKOO-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECKOO-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll index 3604db33d5c4b3..53fbb351954fcf 100644 --- a/llvm/test/CodeGen/AArch64/sext.ll +++ b/llvm/test/CodeGen/AArch64/sext.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i16 @sext_i8_to_i16(i8 %a) { ; CHECK-LABEL: sext_i8_to_i16: diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 69d3174581e3ef..0f5b240e387ed0 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -1,11 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2p0_zeroes -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0_zeroes +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; ===== Legal Vector Types ===== @@ -392,13 +387,49 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %c } +define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) { +; CHECK-SD-LABEL: shufflevector_v3p0: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d2, d5 +; CHECK-SD-NEXT: fmov d0, d1 +; CHECK-SD-NEXT: fmov d1, d3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v3p0: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d3 +; CHECK-GI-NEXT: mov v0.d[0], x8 +; CHECK-GI-NEXT: mov v2.d[0], x9 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: fmov x9, d4 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov v2.d[1], x9 +; CHECK-GI-NEXT: fmov x8, d5 +; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8 +; CHECK-GI-NEXT: fmov x10, d1 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: fmov d1, d2 +; CHECK-GI-NEXT: fmov d2, x10 +; CHECK-GI-NEXT: ret + %c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> + ret <3 x ptr> %c +} + define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) { -; CHECK-LABEL: shufflevector_v4p0: -; CHECK: // %bb.0: -; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d -; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v4p0: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d +; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v4p0: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d +; CHECK-GI-NEXT: ret %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> ret <4 x ptr> %c } @@ -549,13 +580,13 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s0, w0 ; CHECK-GI-NEXT: fmov s1, w3 -; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: adrp x8, .LCPI35_0 ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v1.b[1], w4 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: mov v1.b[2], w5 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: umov w0, v0.b[0] ; CHECK-GI-NEXT: umov w1, v0.b[1] @@ -570,9 +601,9 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: adrp x8, .LCPI35_0 +; CHECK-SD-NEXT: adrp x8, .LCPI36_0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] +; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-SD-NEXT: ret ; @@ -580,9 +611,9 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: adrp x8, .LCPI36_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -601,9 +632,9 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: adrp x8, .LCPI36_0 +; CHECK-GI-NEXT: adrp x8, .LCPI37_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI37_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -614,18 +645,18 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { ; CHECK-SD-LABEL: shufflevector_v7i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI37_0 +; CHECK-SD-NEXT: adrp x8, .LCPI38_0 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v7i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI37_0 +; CHECK-GI-NEXT: adrp x8, .LCPI38_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret @@ -642,9 +673,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { ; ; CHECK-GI-LABEL: shufflevector_v3i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI38_0 +; CHECK-GI-NEXT: adrp x8, .LCPI39_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI39_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index cf201d628b7e1e..23550d3c41cc7d 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll index cabd580e20d504..f08629c15f26c6 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 30e2a70ace0722..be4a5843e8215a 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -497,21 +500,45 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x4 -; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, vs -; CHECK-NEXT: csel x1, x11, x9, vs -; CHECK-NEXT: subs x8, x2, x6 -; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x2, x10, x8, vs -; CHECK-NEXT: csel x3, x11, x9, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x4 +; CHECK-SD-NEXT: sbcs x9, x1, x5 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x10, x8, vs +; CHECK-SD-NEXT: csel x1, x11, x9, vs +; CHECK-SD-NEXT: subs x8, x2, x6 +; CHECK-SD-NEXT: sbcs x9, x3, x7 +; CHECK-SD-NEXT: asr x10, x9, #63 +; CHECK-SD-NEXT: eor x11, x10, #0x8000000000000000 +; CHECK-SD-NEXT: csel x2, x10, x8, vs +; CHECK-SD-NEXT: csel x3, x11, x9, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs x9, x0, x4 +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: mov x13, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: sbcs x10, x1, x5 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x14, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x0, x11, x9, ne +; CHECK-GI-NEXT: csel x1, x14, x10, ne +; CHECK-GI-NEXT: subs x9, x2, x6 +; CHECK-GI-NEXT: sbcs x10, x3, x7 +; CHECK-GI-NEXT: asr x11, x10, #63 +; CHECK-GI-NEXT: cset w12, vs +; CHECK-GI-NEXT: cmp w8, #1 +; CHECK-GI-NEXT: adc x8, x11, x13 +; CHECK-GI-NEXT: tst w12, #0x1 +; CHECK-GI-NEXT: csel x2, x11, x9, ne +; CHECK-GI-NEXT: csel x3, x8, x10, ne +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/strpre-str-merge.mir b/llvm/test/CodeGen/AArch64/strpre-str-merge.mir index 5c1937e0d7753e..722de6bb343e2b 100644 --- a/llvm/test/CodeGen/AArch64/strpre-str-merge.mir +++ b/llvm/test/CodeGen/AArch64/strpre-str-merge.mir @@ -156,7 +156,7 @@ body: | ; CHECK-LABEL: name: 6-strqui-strqpre-no-merge ; CHECK: liveins: $q0, $q1, $x0 ; CHECK: STRQui renamable $q1, renamable $x0, 1 :: (store (s128)) - ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0, implicit $w0_hi :: (store (s128)) + ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store (s128)) ; CHECK: RET undef $lr, implicit $x0 STRQui killed renamable $q1, renamable $x0, 1 :: (store (s128)) early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store (s128)) @@ -235,7 +235,7 @@ body: | liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 9-strspre-strsui-mod-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: dead early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0, implicit $w0_hi :: (store (s32)) + ; CHECK: dead early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 @@ -265,7 +265,7 @@ body: | liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 10-strspre-strsui-used-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0, implicit $w0_hi :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) ; CHECK: STRXui renamable $x1, renamable $x1, 1 :: (store (s32)) ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 @@ -296,12 +296,12 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 11-strspre-strspre-no-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0, implicit $w0_hi :: (store (s32)) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 16, implicit $w0, implicit $w0_hi :: (store (s32)) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0, implicit $w0_hi :: (store (s32)) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 12, implicit $w0, implicit $w0_hi :: (store (s32)) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0, implicit $w0_hi :: (store (s32)) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 4, implicit $w0, implicit $w0_hi :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 16, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 4, implicit $w0 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 12 :: (store (s32)) early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 16 :: (store (s32)) @@ -335,7 +335,7 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 12-strspre-strsui-no-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0, implicit $w0_hi :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) ; CHECK: STRSui renamable $s1, renamable $x0, 2 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store (s32)) @@ -390,7 +390,7 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: 14-strqpre-sturqi-no-merge ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0, implicit $w0_hi :: (store (s128)) + ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store (s128)) ; CHECK: STURQi renamable $q1, renamable $x0, 1 :: (store (s128)) ; CHECK: RET undef $lr, implicit $x0 early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store (s128)) @@ -417,7 +417,7 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 15-strspre-strsui-unaligned-no-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 251, implicit $w0, implicit $w0_hi :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 251, implicit $w0 :: (store (s32)) ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 251 :: (store (s32)) @@ -443,7 +443,7 @@ body: | liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 16-strxpre-strxui-same-reg-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x0 = STRXpre renamable $x1, renamable $x0, 24, implicit $w0, implicit $w0_hi :: (store (s64)) + ; CHECK: early-clobber renamable $x0 = STRXpre renamable $x1, renamable $x0, 24, implicit $w0 :: (store (s64)) ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) ; CHECK: RET undef $lr, implicit $x0 early-clobber renamable $x0 = STRXpre killed renamable $x1, killed renamable $x0, 24 :: (store (s64)) @@ -470,7 +470,7 @@ body: | liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 17-strwpre-strwui-same-reg-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x0 = STRWpre renamable $w1, renamable $x0, 24, implicit $w0, implicit $w0_hi, implicit-def $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRWpre renamable $w1, renamable $x0, 24, implicit $w0, implicit-def $w0 :: (store (s32)) ; CHECK: STRWui renamable $w0, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 early-clobber renamable $x0 = STRWpre killed renamable $w1, killed renamable $x0, 24 :: (store (s32)) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir index f85658e9a596a0..45829b3198224d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir @@ -28,7 +28,7 @@ body: | ; CHECK-LABEL: name: bic_i16_zero ; CHECK: liveins: $p0, $z0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $b0_hi, implicit-def $h0_hi, implicit-def $s0_hi, implicit-def $d0_hi, implicit-def $q0_hi, implicit killed $p0, implicit $z0 { + ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit killed $p0, implicit $z0 { ; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_H $p0, $z0 ; CHECK-NEXT: $z0 = LSL_ZPmI_H killed renamable $p0, internal $z0, 0 ; CHECK-NEXT: $z0 = BIC_ZPmZ_H killed renamable $p0, internal killed $z0, internal killed renamable $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir index 0e2f1c3ff4f697..970077fdfea7b8 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryComm-merging.mir @@ -28,7 +28,7 @@ body: | ; CHECK-LABEL: name: fmul_float_zero ; CHECK: liveins: $p0, $z0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $b0_hi, implicit-def $h0_hi, implicit-def $s0_hi, implicit-def $d0_hi, implicit-def $q0_hi, implicit $p0, implicit $z0 { + ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit $p0, implicit $z0 { ; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_S $p0, $z0 ; CHECK-NEXT: $z0 = LSL_ZPmI_S renamable $p0, internal $z0, 0 ; CHECK-NEXT: $z0 = FMUL_ZPmZ_S renamable $p0, internal killed $z0, internal killed renamable $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir index 548ebc26b83e80..308291fb6fe933 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-binaryCommWithRev-merging.mir @@ -30,7 +30,7 @@ body: | ; CHECK-LABEL: name: fsub_s_zero ; CHECK: liveins: $p0, $z0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $b0_hi, implicit-def $h0_hi, implicit-def $s0_hi, implicit-def $d0_hi, implicit-def $q0_hi, implicit $p0, implicit $z0 { + ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit $p0, implicit $z0 { ; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_S $p0, $z0 ; CHECK-NEXT: $z0 = LSL_ZPmI_S renamable $p0, internal $z0, 0 ; CHECK-NEXT: $z0 = FSUBR_ZPmZ_S renamable $p0, internal killed $z0, internal killed renamable $z0 diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll index ccf46e8fce2e15..e9d22c7be52efe 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.uadd.sat.i4(i4, i4) declare i8 @llvm.uadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll index d29564029544c9..5c81e3f20277a7 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.uadd.sat.i4(i4, i4) declare i8 @llvm.uadd.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index badd31c1c561c5..924bd3981779ea 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -488,17 +491,33 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x4 -; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: csinv x0, x8, xzr, lo -; CHECK-NEXT: csinv x1, x9, xzr, lo -; CHECK-NEXT: adds x8, x2, x6 -; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: csinv x2, x8, xzr, lo -; CHECK-NEXT: csinv x3, x9, xzr, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x4 +; CHECK-SD-NEXT: adcs x9, x1, x5 +; CHECK-SD-NEXT: csinv x0, x8, xzr, lo +; CHECK-SD-NEXT: csinv x1, x9, xzr, lo +; CHECK-SD-NEXT: adds x8, x2, x6 +; CHECK-SD-NEXT: adcs x9, x3, x7 +; CHECK-SD-NEXT: csinv x2, x8, xzr, lo +; CHECK-SD-NEXT: csinv x3, x9, xzr, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds x8, x0, x4 +; CHECK-GI-NEXT: adcs x9, x1, x5 +; CHECK-GI-NEXT: cset w10, hs +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csinv x0, x8, xzr, eq +; CHECK-GI-NEXT: csinv x1, x9, xzr, eq +; CHECK-GI-NEXT: adds x8, x2, x6 +; CHECK-GI-NEXT: adcs x9, x3, x7 +; CHECK-GI-NEXT: cset w10, hs +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csinv x2, x8, xzr, eq +; CHECK-GI-NEXT: csinv x3, x9, xzr, eq +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll index 160e7e6607cdc3..54d7fc5a63b115 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.usub.sat.i4(i4, i4) declare i8 @llvm.usub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll index a9932216dbe34c..2793aeb163c94d 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.usub.sat.i4(i4, i4) declare i8 @llvm.usub.sat.i8(i8, i8) diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 45418b5c648fa3..a623eb554cac79 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,6 +2,9 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; CHECK-GI: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 + declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -486,17 +489,33 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { } define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind { -; CHECK-LABEL: v2i128: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x4 -; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: csel x0, xzr, x8, lo -; CHECK-NEXT: csel x1, xzr, x9, lo -; CHECK-NEXT: subs x8, x2, x6 -; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: csel x2, xzr, x8, lo -; CHECK-NEXT: csel x3, xzr, x9, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x4 +; CHECK-SD-NEXT: sbcs x9, x1, x5 +; CHECK-SD-NEXT: csel x0, xzr, x8, lo +; CHECK-SD-NEXT: csel x1, xzr, x9, lo +; CHECK-SD-NEXT: subs x8, x2, x6 +; CHECK-SD-NEXT: sbcs x9, x3, x7 +; CHECK-SD-NEXT: csel x2, xzr, x8, lo +; CHECK-SD-NEXT: csel x3, xzr, x9, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs x8, x0, x4 +; CHECK-GI-NEXT: sbcs x9, x1, x5 +; CHECK-GI-NEXT: cset w10, lo +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, xzr, x8, ne +; CHECK-GI-NEXT: csel x1, xzr, x9, ne +; CHECK-GI-NEXT: subs x8, x2, x6 +; CHECK-GI-NEXT: sbcs x9, x3, x7 +; CHECK-GI-NEXT: cset w10, lo +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x2, xzr, x8, ne +; CHECK-GI-NEXT: csel x3, xzr, x9, ne +; CHECK-GI-NEXT: ret %z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y) ret <2 x i128> %z } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index d71aed2d17506b..809a6d6556a7be 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a) declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll index d0ae669ffb3d68..5d149f7c0c62ef 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -enable-var-scope --check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=1 < %s | FileCheck -enable-var-scope --check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,SDAG %s +; RUN: llc -march=amdgcn -mcpu=gfx950 -global-isel=1 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL %s declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half>, <8 x half>, <4 x float>, i32 immarg, i32 immarg, i32 immarg) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half>, <8 x half>, <16 x float>, i32 immarg, i32 immarg, i32 immarg) @@ -49,52 +49,366 @@ define <4 x float> @test_mfma_f32_16x16x32_f16__flags(<8 x half> %arg0, <8 x hal ret <4 x float> %result } -define <4 x float> @test_mfma_f32_16x16x32_f16__mac(<4 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_16x16x32_f16__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[4:7], v[8:11], a[0:3] -; GCN-NEXT: s_nop 6 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] +define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrspace(1) %out, <8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2) #0 { +; SDAG-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0) - ret <4 x float> %result + store <4 x float> %result, ptr addrspace(1) %out + ret void } -define <4 x float> @test_mfma_f32_16x16x32_f16___flags__mac(<4 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_16x16x32_f16___flags__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[4:7], v[8:11], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 6 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: s_setpc_b64 s[30:31] - %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 1, i32 1, i32 1) - ret <4 x float> %result +define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr addrspace(1) %out, <8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2) #0 { +; SDAG-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GISEL-NEXT: s_endpgm + %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1) + store <4 x float> %result, ptr addrspace(1) %out + ret void } ; -------------------------------------------------------------------- ; llvm.amdgcn.mfma.f32.32x32x16.f16 ; -------------------------------------------------------------------- -define <16 x float> @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16: +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) #1 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 48 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 32 +; SDAG-NEXT: v_mov_b64_e32 v[16:17], 16 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[18:19], 0 +; SDAG-NEXT: v_mov_b32_e32 v8, s16 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: v_mov_b32_e32 v9, s17 +; SDAG-NEXT: v_mov_b32_e32 v10, s18 +; SDAG-NEXT: v_mov_b32_e32 v11, s19 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[16:17], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[18:19], a[16:19], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[8:11], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm + %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) + store volatile <16 x float> %result, ptr addrspace(1) null + store volatile <16 x float> %arg2, ptr addrspace(1) null + ret void +} + +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) #1 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16__flags: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 48 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 32 +; SDAG-NEXT: v_mov_b64_e32 v[16:17], 16 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[18:19], 0 +; SDAG-NEXT: v_mov_b32_e32 v8, s16 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: v_mov_b32_e32 v9, s17 +; SDAG-NEXT: v_mov_b32_e32 v10, s18 +; SDAG-NEXT: v_mov_b32_e32 v11, s19 +; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[16:17], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[18:19], a[16:19], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[8:11], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v[18:19], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16__flags: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm + %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 2, i32 3, i32 1) + store volatile <16 x float> %result, ptr addrspace(1) null + store volatile <16 x float> %arg2, ptr addrspace(1) null + ret void +} + +define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { +; GCN-LABEL: test_mfma_f32_32x32x16_f16__mac: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_accvgpr_write_b32 a0, v8 @@ -138,8 +452,8 @@ define <16 x float> @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x half> %ar ret <16 x float> %result } -define <16 x float> @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16__flags: +define <16 x float> @test_mfma_f32_32x32x16_f16__mac__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2) { +; GCN-LABEL: test_mfma_f32_32x32x16_f16__mac__flags: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_accvgpr_write_b32 a0, v8 @@ -183,155 +497,327 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <8 x ha ret <16 x float> %result } -define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<16 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_accvgpr_write_b32 a5, v5 -; GCN-NEXT: v_accvgpr_write_b32 a6, v6 -; GCN-NEXT: v_accvgpr_write_b32 a7, v7 -; GCN-NEXT: v_accvgpr_write_b32 a8, v8 -; GCN-NEXT: v_accvgpr_write_b32 a9, v9 -; GCN-NEXT: v_accvgpr_write_b32 a10, v10 -; GCN-NEXT: v_accvgpr_write_b32 a11, v11 -; GCN-NEXT: v_accvgpr_write_b32 a12, v12 -; GCN-NEXT: v_accvgpr_write_b32 a13, v13 -; GCN-NEXT: v_accvgpr_write_b32 a14, v14 -; GCN-NEXT: v_accvgpr_write_b32 a15, v15 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[16:19], v[20:23], a[0:15] -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 +; SDAG-NEXT: v_mov_b32_e32 v9, s21 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31] +; SDAG-NEXT: v_mov_b32_e32 v10, s22 +; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) - ret <16 x float> %result + store volatile <16 x float> %arg2, ptr addrspace(1) %out + store volatile <16 x float> %result, ptr addrspace(1) %out + ret void } -define <16 x float> @test_mfma_f32_32x32x16_f16__flags__mac(<16 x float> %arg2, <8 x half> %arg0, <8 x half> %arg1) { -; GCN-LABEL: test_mfma_f32_32x32x16_f16__flags__mac: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_accvgpr_write_b32 a0, v0 -; GCN-NEXT: v_accvgpr_write_b32 a1, v1 -; GCN-NEXT: v_accvgpr_write_b32 a2, v2 -; GCN-NEXT: v_accvgpr_write_b32 a3, v3 -; GCN-NEXT: v_accvgpr_write_b32 a4, v4 -; GCN-NEXT: v_accvgpr_write_b32 a5, v5 -; GCN-NEXT: v_accvgpr_write_b32 a6, v6 -; GCN-NEXT: v_accvgpr_write_b32 a7, v7 -; GCN-NEXT: v_accvgpr_write_b32 a8, v8 -; GCN-NEXT: v_accvgpr_write_b32 a9, v9 -; GCN-NEXT: v_accvgpr_write_b32 a10, v10 -; GCN-NEXT: v_accvgpr_write_b32 a11, v11 -; GCN-NEXT: v_accvgpr_write_b32 a12, v12 -; GCN-NEXT: v_accvgpr_write_b32 a13, v13 -; GCN-NEXT: v_accvgpr_write_b32 a14, v14 -; GCN-NEXT: v_accvgpr_write_b32 a15, v15 -; GCN-NEXT: s_nop 1 -; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[16:19], v[20:23], a[0:15] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 7 -; GCN-NEXT: s_nop 2 -; GCN-NEXT: v_accvgpr_read_b32 v0, a0 -; GCN-NEXT: v_accvgpr_read_b32 v1, a1 -; GCN-NEXT: v_accvgpr_read_b32 v2, a2 -; GCN-NEXT: v_accvgpr_read_b32 v3, a3 -; GCN-NEXT: v_accvgpr_read_b32 v4, a4 -; GCN-NEXT: v_accvgpr_read_b32 v5, a5 -; GCN-NEXT: v_accvgpr_read_b32 v6, a6 -; GCN-NEXT: v_accvgpr_read_b32 v7, a7 -; GCN-NEXT: v_accvgpr_read_b32 v8, a8 -; GCN-NEXT: v_accvgpr_read_b32 v9, a9 -; GCN-NEXT: v_accvgpr_read_b32 v10, a10 -; GCN-NEXT: v_accvgpr_read_b32 v11, a11 -; GCN-NEXT: v_accvgpr_read_b32 v12, a12 -; GCN-NEXT: v_accvgpr_read_b32 v13, a13 -; GCN-NEXT: v_accvgpr_read_b32 v14, a14 -; GCN-NEXT: v_accvgpr_read_b32 v15, a15 -; GCN-NEXT: s_setpc_b64 s[30:31] - %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 1, i32 1, i32 1) - ret <16 x float> %result +define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { +; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 +; SDAG-NEXT: v_mov_b32_e32 v8, s20 +; SDAG-NEXT: v_mov_b32_e32 v9, s21 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3 +; SDAG-NEXT: v_mov_b32_e32 v10, s22 +; SDAG-NEXT: v_mov_b32_e32 v11, s23 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v12, a[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:1 abid:2 blgp:3 +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_endpgm + %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 1, i32 2, i32 3) + store volatile <16 x float> %arg2, ptr addrspace(1) %out + store volatile <16 x float> %result, ptr addrspace(1) %out + ret void } define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] -; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0) store <16 x float> %result, ptr addrspace(1) %out @@ -341,62 +827,76 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half> %arg0, <8 x half> %arg1, <16 x float> %arg2, i32 3, i32 2, i32 1) store <16 x float> %result, ptr addrspace(1) %out @@ -419,7 +919,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -439,7 +939,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -452,50 +952,48 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspace(1) %out, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) #0 { ; SDAG-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 -; SDAG-NEXT: v_mov_b32_e32 v4, s8 -; SDAG-NEXT: v_mov_b32_e32 v5, s9 -; SDAG-NEXT: v_mov_b32_e32 v6, s10 -; SDAG-NEXT: v_mov_b32_e32 v7, s11 -; SDAG-NEXT: v_mov_b32_e32 v8, s12 -; SDAG-NEXT: v_mov_b32_e32 v9, s13 -; SDAG-NEXT: v_mov_b32_e32 v10, s14 -; SDAG-NEXT: v_mov_b32_e32 v11, s15 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: v_mov_b32_e32 v4, s12 +; SDAG-NEXT: v_mov_b32_e32 v5, s13 +; SDAG-NEXT: v_mov_b32_e32 v6, s14 +; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[2:3] +; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 0, i32 0, i32 0) store <4 x i32> %result, ptr addrspace(1) %out @@ -505,50 +1003,48 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr addrspace(1) %out, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) #0 { ; SDAG-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 -; SDAG-NEXT: v_mov_b32_e32 v4, s8 -; SDAG-NEXT: v_mov_b32_e32 v5, s9 -; SDAG-NEXT: v_mov_b32_e32 v6, s10 -; SDAG-NEXT: v_mov_b32_e32 v7, s11 -; SDAG-NEXT: v_mov_b32_e32 v8, s12 -; SDAG-NEXT: v_mov_b32_e32 v9, s13 -; SDAG-NEXT: v_mov_b32_e32 v10, s14 -; SDAG-NEXT: v_mov_b32_e32 v11, s15 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: v_mov_b32_e32 v4, s12 +; SDAG-NEXT: v_mov_b32_e32 v5, s13 +; SDAG-NEXT: v_mov_b32_e32 v6, s14 +; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[2:3] +; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 3, i32 2, i32 1) store <4 x i32> %result, ptr addrspace(1) %out @@ -564,137 +1060,131 @@ declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32>, <4 x i32>, <16 x define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2) #1 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[8:9], 16 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[8:9], 48 +; SDAG-NEXT: v_mov_b64_e32 v[10:11], 32 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 16 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s20 -; SDAG-NEXT: v_accvgpr_write_b32 a31, s19 -; SDAG-NEXT: v_mov_b32_e32 v1, s21 -; SDAG-NEXT: v_mov_b32_e32 v2, s22 -; SDAG-NEXT: v_mov_b32_e32 v3, s23 -; SDAG-NEXT: v_mov_b32_e32 v4, s24 -; SDAG-NEXT: v_mov_b32_e32 v5, s25 -; SDAG-NEXT: v_mov_b32_e32 v6, s26 -; SDAG-NEXT: v_mov_b32_e32 v7, s27 -; SDAG-NEXT: v_accvgpr_write_b32 a30, s18 -; SDAG-NEXT: v_accvgpr_write_b32 a29, s17 -; SDAG-NEXT: v_accvgpr_write_b32 a28, s16 -; SDAG-NEXT: v_accvgpr_write_b32 a27, s15 -; SDAG-NEXT: v_accvgpr_write_b32 a26, s14 -; SDAG-NEXT: v_accvgpr_write_b32 a25, s13 -; SDAG-NEXT: v_accvgpr_write_b32 a24, s12 -; SDAG-NEXT: v_accvgpr_write_b32 a23, s11 -; SDAG-NEXT: v_accvgpr_write_b32 a22, s10 -; SDAG-NEXT: v_accvgpr_write_b32 a21, s9 -; SDAG-NEXT: v_accvgpr_write_b32 a20, s8 -; SDAG-NEXT: v_accvgpr_write_b32 a19, s7 -; SDAG-NEXT: v_accvgpr_write_b32 a18, s6 -; SDAG-NEXT: v_accvgpr_write_b32 a17, s5 -; SDAG-NEXT: v_accvgpr_write_b32 a16, s4 -; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], 48 -; SDAG-NEXT: v_mov_b64_e32 v[6:7], 32 -; SDAG-NEXT: v_mov_b32_e32 v0, s12 -; SDAG-NEXT: v_mov_b32_e32 v1, s13 -; SDAG-NEXT: v_mov_b32_e32 v2, s14 -; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 4 -; SDAG-NEXT: global_store_dwordx4 v[4:5], a[12:15], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[8:9], a[4:7], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], a[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v6, s30 +; SDAG-NEXT: v_mov_b32_e32 v7, s31 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 0 ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] ; SDAG-NEXT: v_mov_b32_e32 v0, s16 ; SDAG-NEXT: v_mov_b32_e32 v1, s17 ; SDAG-NEXT: v_mov_b32_e32 v2, s18 ; SDAG-NEXT: v_mov_b32_e32 v3, s19 -; SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 ; SDAG-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 ; SDAG-NEXT: v_mov_b32_e32 v3, s11 -; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[24:25] -; GISEL-NEXT: v_accvgpr_write_b32 a31, s19 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[26:27] -; GISEL-NEXT: v_accvgpr_write_b32 a30, s18 -; GISEL-NEXT: v_accvgpr_write_b32 a29, s17 -; GISEL-NEXT: v_accvgpr_write_b32 a28, s16 -; GISEL-NEXT: v_accvgpr_write_b32 a27, s15 -; GISEL-NEXT: v_accvgpr_write_b32 a26, s14 -; GISEL-NEXT: v_accvgpr_write_b32 a25, s13 -; GISEL-NEXT: v_accvgpr_write_b32 a24, s12 -; GISEL-NEXT: v_accvgpr_write_b32 a23, s11 -; GISEL-NEXT: v_accvgpr_write_b32 a22, s10 -; GISEL-NEXT: v_accvgpr_write_b32 a21, s9 -; GISEL-NEXT: v_accvgpr_write_b32 a20, s8 -; GISEL-NEXT: v_accvgpr_write_b32 a19, s7 -; GISEL-NEXT: v_accvgpr_write_b32 a18, s6 -; GISEL-NEXT: v_accvgpr_write_b32 a17, s5 -; GISEL-NEXT: v_accvgpr_write_b32 a16, s4 -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 16 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 32 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[0:3], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 0, i32 0, i32 0) @@ -706,137 +1196,131 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32> define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2) #1 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[8:9], 16 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], 0 +; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b64_e32 v[8:9], 48 +; SDAG-NEXT: v_mov_b64_e32 v[10:11], 32 +; SDAG-NEXT: v_mov_b64_e32 v[12:13], 16 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v0, s20 -; SDAG-NEXT: v_accvgpr_write_b32 a31, s19 -; SDAG-NEXT: v_mov_b32_e32 v1, s21 -; SDAG-NEXT: v_mov_b32_e32 v2, s22 -; SDAG-NEXT: v_mov_b32_e32 v3, s23 -; SDAG-NEXT: v_mov_b32_e32 v4, s24 -; SDAG-NEXT: v_mov_b32_e32 v5, s25 -; SDAG-NEXT: v_mov_b32_e32 v6, s26 -; SDAG-NEXT: v_mov_b32_e32 v7, s27 -; SDAG-NEXT: v_accvgpr_write_b32 a30, s18 -; SDAG-NEXT: v_accvgpr_write_b32 a29, s17 -; SDAG-NEXT: v_accvgpr_write_b32 a28, s16 -; SDAG-NEXT: v_accvgpr_write_b32 a27, s15 -; SDAG-NEXT: v_accvgpr_write_b32 a26, s14 -; SDAG-NEXT: v_accvgpr_write_b32 a25, s13 -; SDAG-NEXT: v_accvgpr_write_b32 a24, s12 -; SDAG-NEXT: v_accvgpr_write_b32 a23, s11 -; SDAG-NEXT: v_accvgpr_write_b32 a22, s10 -; SDAG-NEXT: v_accvgpr_write_b32 a21, s9 -; SDAG-NEXT: v_accvgpr_write_b32 a20, s8 -; SDAG-NEXT: v_accvgpr_write_b32 a19, s7 -; SDAG-NEXT: v_accvgpr_write_b32 a18, s6 -; SDAG-NEXT: v_accvgpr_write_b32 a17, s5 -; SDAG-NEXT: v_accvgpr_write_b32 a16, s4 -; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:2 abid:3 blgp:1 -; SDAG-NEXT: v_mov_b64_e32 v[4:5], 48 -; SDAG-NEXT: v_mov_b64_e32 v[6:7], 32 -; SDAG-NEXT: v_mov_b32_e32 v0, s12 -; SDAG-NEXT: v_mov_b32_e32 v1, s13 -; SDAG-NEXT: v_mov_b32_e32 v2, s14 -; SDAG-NEXT: v_mov_b32_e32 v3, s15 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 4 -; SDAG-NEXT: global_store_dwordx4 v[4:5], a[12:15], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[8:9], a[4:7], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[10:11], a[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 -; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v0, s24 +; SDAG-NEXT: v_mov_b32_e32 v1, s25 +; SDAG-NEXT: v_mov_b32_e32 v2, s26 +; SDAG-NEXT: v_mov_b32_e32 v3, s27 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_mov_b32_e32 v4, s28 +; SDAG-NEXT: v_mov_b32_e32 v5, s29 +; SDAG-NEXT: v_mov_b32_e32 v6, s30 +; SDAG-NEXT: v_mov_b32_e32 v7, s31 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 +; SDAG-NEXT: v_mov_b64_e32 v[14:15], 0 ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 ; SDAG-NEXT: v_mov_b32_e32 v0, s16 ; SDAG-NEXT: v_mov_b32_e32 v1, s17 ; SDAG-NEXT: v_mov_b32_e32 v2, s18 ; SDAG-NEXT: v_mov_b32_e32 v3, s19 -; SDAG-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_nop 6 +; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v0, s4 -; SDAG-NEXT: v_mov_b32_e32 v1, s5 -; SDAG-NEXT: v_mov_b32_e32 v2, s6 -; SDAG-NEXT: v_mov_b32_e32 v3, s7 ; SDAG-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 ; SDAG-NEXT: v_mov_b32_e32 v3, s11 -; SDAG-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 0 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 0 +; GISEL-NEXT: v_mov_b64_e32 v[26:27], 48 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 16 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1 ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[24:25] -; GISEL-NEXT: v_accvgpr_write_b32 a31, s19 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[26:27] -; GISEL-NEXT: v_accvgpr_write_b32 a30, s18 -; GISEL-NEXT: v_accvgpr_write_b32 a29, s17 -; GISEL-NEXT: v_accvgpr_write_b32 a28, s16 -; GISEL-NEXT: v_accvgpr_write_b32 a27, s15 -; GISEL-NEXT: v_accvgpr_write_b32 a26, s14 -; GISEL-NEXT: v_accvgpr_write_b32 a25, s13 -; GISEL-NEXT: v_accvgpr_write_b32 a24, s12 -; GISEL-NEXT: v_accvgpr_write_b32 a23, s11 -; GISEL-NEXT: v_accvgpr_write_b32 a22, s10 -; GISEL-NEXT: v_accvgpr_write_b32 a21, s9 -; GISEL-NEXT: v_accvgpr_write_b32 a20, s8 -; GISEL-NEXT: v_accvgpr_write_b32 a19, s7 -; GISEL-NEXT: v_accvgpr_write_b32 a18, s6 -; GISEL-NEXT: v_accvgpr_write_b32 a17, s5 -; GISEL-NEXT: v_accvgpr_write_b32 a16, s4 -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:2 abid:3 blgp:1 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 16 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 32 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[0:3], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[24:25], 32 +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], a[24:27], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], a[28:31], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 2, i32 3, i32 1) @@ -867,7 +1351,8 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac(<4 x i32> %arg0, <4 x i32> %ar ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -911,7 +1396,8 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -936,116 +1422,126 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v40, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v32, s16 -; SDAG-NEXT: v_mov_b32_e32 v33, s17 -; SDAG-NEXT: v_mov_b32_e32 v34, s18 -; SDAG-NEXT: v_mov_b32_e32 v35, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v36, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v37, s21 -; SDAG-NEXT: v_mov_b32_e32 v38, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; SDAG-NEXT: v_mov_b32_e32 v39, s23 -; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[4:5] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s12 -; SDAG-NEXT: v_mov_b32_e32 v17, s13 -; SDAG-NEXT: v_mov_b32_e32 v18, s14 -; SDAG-NEXT: v_mov_b32_e32 v19, s15 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s8 -; SDAG-NEXT: v_mov_b32_e32 v17, s9 -; SDAG-NEXT: v_mov_b32_e32 v18, s10 -; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s4 -; SDAG-NEXT: v_mov_b32_e32 v17, s5 -; SDAG-NEXT: v_mov_b32_e32 v18, s6 -; SDAG-NEXT: v_mov_b32_e32 v19, s7 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[4:5] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: v_mov_b32_e32 v20, 0 -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[0:3], s[2:3] sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[4:7], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[8:11], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[12:15], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 0, i32 0, i32 0) @@ -1057,116 +1553,126 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4 define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v40, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v32, s16 -; SDAG-NEXT: v_mov_b32_e32 v33, s17 -; SDAG-NEXT: v_mov_b32_e32 v34, s18 -; SDAG-NEXT: v_mov_b32_e32 v35, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v36, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v37, s21 -; SDAG-NEXT: v_mov_b32_e32 v38, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; SDAG-NEXT: v_mov_b32_e32 v39, s23 -; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[4:5] +; SDAG-NEXT: v_accvgpr_write_b32 a31, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a30, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a29, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a28, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a27, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a26, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a25, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a24, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a23, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a22, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a21, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a20, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a19, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a18, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a17, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a16, s8 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s12 -; SDAG-NEXT: v_mov_b32_e32 v17, s13 -; SDAG-NEXT: v_mov_b32_e32 v18, s14 -; SDAG-NEXT: v_mov_b32_e32 v19, s15 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s8 -; SDAG-NEXT: v_mov_b32_e32 v17, s9 -; SDAG-NEXT: v_mov_b32_e32 v18, s10 -; SDAG-NEXT: v_mov_b32_e32 v19, s11 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_mov_b32_e32 v16, s4 -; SDAG-NEXT: v_mov_b32_e32 v17, s5 -; SDAG-NEXT: v_mov_b32_e32 v18, s6 -; SDAG-NEXT: v_mov_b32_e32 v19, s7 -; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1 +; SDAG-NEXT: v_mov_b32_e32 v0, s8 +; SDAG-NEXT: v_mov_b32_e32 v1, s9 +; SDAG-NEXT: v_mov_b32_e32 v2, s10 +; SDAG-NEXT: v_mov_b32_e32 v3, s11 +; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1 +; SDAG-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 +; GISEL-NEXT: v_mov_b32_e32 v24, 0 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[18:19] -; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[4:5] -; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3 -; GISEL-NEXT: s_nop 7 -; GISEL-NEXT: s_nop 6 -; GISEL-NEXT: v_mov_b32_e32 v20, 0 -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:1 abid:2 blgp:3 +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v20, v[16:19], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[0:3], s[2:3] sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[4:7], s[2:3] offset:16 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[8:11], s[2:3] offset:32 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v20, v[12:15], s[2:3] offset:48 sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 1, i32 2, i32 3) @@ -1178,67 +1684,81 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v24, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v20, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v21, s21 -; SDAG-NEXT: v_mov_b32_e32 v22, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b32_e32 v23, s23 -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] -; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v24, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] -; GISEL-NEXT: v_mov_b32_e32 v16, 0 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[2:3] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[2:3] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[2:3] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[2:3] offset:48 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 0, i32 0, i32 0) store <16 x i32> %result, ptr addrspace(1) %out @@ -1248,67 +1768,81 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0 define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, ptr addrspace(1) %out) #0 { ; SDAG-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[16:23], s[0:1], 0x24 -; SDAG-NEXT: v_mov_b32_e32 v24, 0 +; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 -; SDAG-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; SDAG-NEXT: v_mov_b32_e32 v20, s20 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa4 -; SDAG-NEXT: v_mov_b32_e32 v21, s21 -; SDAG-NEXT: v_mov_b32_e32 v22, s22 +; SDAG-NEXT: v_mov_b32_e32 v0, s20 +; SDAG-NEXT: v_mov_b32_e32 v1, s21 +; SDAG-NEXT: v_mov_b32_e32 v2, s22 +; SDAG-NEXT: v_mov_b32_e32 v3, s23 +; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; SDAG-NEXT: v_mov_b32_e32 v4, s24 +; SDAG-NEXT: v_mov_b32_e32 v5, s25 +; SDAG-NEXT: v_mov_b32_e32 v6, s26 +; SDAG-NEXT: v_mov_b32_e32 v7, s27 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b32_e32 v23, s23 -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s8 +; SDAG-NEXT: v_accvgpr_write_b32 a1, s9 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s10 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s11 +; SDAG-NEXT: v_accvgpr_write_b32 a4, s12 +; SDAG-NEXT: v_accvgpr_write_b32 a5, s13 +; SDAG-NEXT: v_accvgpr_write_b32 a6, s14 +; SDAG-NEXT: v_accvgpr_write_b32 a7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a8, s16 +; SDAG-NEXT: v_accvgpr_write_b32 a9, s17 +; SDAG-NEXT: v_accvgpr_write_b32 a10, s18 +; SDAG-NEXT: v_accvgpr_write_b32 a11, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a12, s20 +; SDAG-NEXT: v_accvgpr_write_b32 a13, s21 +; SDAG-NEXT: v_accvgpr_write_b32 a14, s22 +; SDAG-NEXT: v_accvgpr_write_b32 a15, s23 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: s_nop 2 -; SDAG-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v24, v[0:3], s[0:1] +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[20:27], s[0:1], 0x24 -; GISEL-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x64 -; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xa4 +; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24 +; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[24:25] -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[22:23] -; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[26:27] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s9 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s10 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s11 +; GISEL-NEXT: v_accvgpr_write_b32 a4, s12 +; GISEL-NEXT: v_accvgpr_write_b32 a5, s13 +; GISEL-NEXT: v_accvgpr_write_b32 a6, s14 +; GISEL-NEXT: v_accvgpr_write_b32 a7, s15 +; GISEL-NEXT: v_accvgpr_write_b32 a8, s16 +; GISEL-NEXT: v_accvgpr_write_b32 a9, s17 +; GISEL-NEXT: v_accvgpr_write_b32 a10, s18 +; GISEL-NEXT: v_accvgpr_write_b32 a11, s19 +; GISEL-NEXT: v_accvgpr_write_b32 a12, s20 +; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 +; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 +; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v16, 0 -; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-NEXT: s_nop 7 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[2:3] -; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[2:3] offset:16 -; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[2:3] offset:32 -; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[2:3] offset:48 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 ; GISEL-NEXT: s_endpgm %result = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x32.i8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x i32> %arg2, i32 3, i32 2, i32 1) store <16 x i32> %result, ptr addrspace(1) %out @@ -1428,41 +1962,44 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16__flags(<8 x bfloat> %arg0, <8 x define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrspace(1) %out, <8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2) #0 { ; SDAG-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] +; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] ; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_nop 4 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0) store <4 x float> %result, ptr addrspace(1) %out @@ -1472,41 +2009,44 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(ptr addrspace(1) %out, <8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2) #0 { ; SDAG-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; SDAG-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; SDAG-NEXT: v_mov_b32_e32 v8, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 +; SDAG-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 ; SDAG-NEXT: s_nop 6 -; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 -; GISEL-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x54 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 +; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[12:13] -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GISEL-NEXT: v_accvgpr_write_b32 a3, s3 ; GISEL-NEXT: s_nop 1 -; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1 -; GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_nop 4 -; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GISEL-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1 +; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: s_nop 5 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1) store <4 x float> %result, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll index 1e0a0bf2ca9d93..9a8282231ac15a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll @@ -23,7 +23,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -46,7 +47,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -69,7 +71,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -92,7 +95,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -115,7 +119,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -138,7 +143,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -161,7 +167,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -184,7 +191,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -208,7 +216,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -232,7 +241,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -256,7 +266,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -280,7 +291,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -304,7 +316,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -328,7 +341,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -352,7 +366,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -376,7 +391,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -400,7 +416,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz0__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -424,7 +441,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -448,7 +466,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -472,7 +491,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -497,7 +517,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -521,7 +542,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -544,7 +566,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -568,7 +591,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -592,7 +616,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -616,7 +641,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -640,7 +666,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz1__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:11], a[0:3] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -664,7 +691,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -688,7 +716,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -712,7 +741,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -736,7 +766,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -760,7 +791,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -784,7 +815,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -808,7 +839,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -832,7 +863,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -857,7 +888,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -881,7 +913,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -905,7 +938,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -929,7 +963,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -953,7 +988,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -977,7 +1012,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1001,7 +1036,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1025,7 +1060,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1049,7 +1084,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1073,7 +1108,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz3__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:11], a[0:3] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1097,7 +1132,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1121,7 +1156,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz2__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:9], a[0:3] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1145,7 +1180,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1169,7 +1205,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1193,7 +1230,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3], v16, v17 op_sel_hi:[0,0,0] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1217,7 +1255,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v15 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:11], a[0:3] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1241,7 +1280,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1265,7 +1304,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1289,7 +1328,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3], v14, v15 op_sel_hi:[0,0,0] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1313,7 +1352,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v13 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:9], a[0:3] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1337,7 +1376,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3], v12, v13 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1361,7 +1400,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__cbsz4__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a3, v11 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1389,7 +1428,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: v_mov_b32_e32 v16, s1 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1409,7 +1449,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v20 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1429,7 +1470,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, s0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1443,6 +1485,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v12, s0 +; SDAG-NEXT: v_mov_b32_e32 v13, s1 +; SDAG-NEXT: v_mov_b32_e32 v14, s2 +; SDAG-NEXT: v_mov_b32_e32 v15, s3 +; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v17, s17 +; SDAG-NEXT: v_mov_b32_e32 v18, s18 +; SDAG-NEXT: v_mov_b32_e32 v19, s19 ; SDAG-NEXT: v_mov_b32_e32 v20, s28 ; SDAG-NEXT: v_mov_b32_e32 v23, v1 ; SDAG-NEXT: v_mov_b32_e32 v22, v0 @@ -1456,20 +1506,13 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_mov_b32_e32 v9, s25 ; SDAG-NEXT: v_mov_b32_e32 v10, s26 ; SDAG-NEXT: v_mov_b32_e32 v11, s27 -; SDAG-NEXT: v_mov_b32_e32 v12, s0 -; SDAG-NEXT: v_mov_b32_e32 v13, s1 -; SDAG-NEXT: v_mov_b32_e32 v14, s2 -; SDAG-NEXT: v_mov_b32_e32 v15, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 -; SDAG-NEXT: v_mov_b32_e32 v17, s17 -; SDAG-NEXT: v_mov_b32_e32 v18, s18 -; SDAG-NEXT: v_mov_b32_e32 v19, s19 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v21 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v22 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[12:19], v[4:11], a[0:3], v2, v3 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1481,18 +1524,18 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s12, s0 ; GISEL-NEXT: s_mov_b32 s13, s1 -; GISEL-NEXT: v_mov_b32_e32 v20, s28 ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v20, s28 ; GISEL-NEXT: v_mov_b32_e32 v22, v0 ; GISEL-NEXT: v_mov_b32_e32 v23, v1 ; GISEL-NEXT: v_mov_b32_e32 v21, s29 -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_accvgpr_write_b32 a0, v20 -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25] ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27] @@ -1501,7 +1544,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_accvgpr_write_b32 a3, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[4:11], v[12:19], a[0:3], v2, v3 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1515,7 +1559,6 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgpr_vgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v14, s0 ; SDAG-NEXT: v_mov_b32_e32 v15, s1 ; SDAG-NEXT: v_mov_b32_e32 v16, s2 @@ -1524,12 +1567,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_mov_b32_e32 v19, s17 ; SDAG-NEXT: v_mov_b32_e32 v20, s18 ; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1544,16 +1589,17 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], s20, v12 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1567,7 +1613,6 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgpr_sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v14, s0 ; SDAG-NEXT: v_mov_b32_e32 v15, s1 ; SDAG-NEXT: v_mov_b32_e32 v16, s2 @@ -1576,12 +1621,14 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_mov_b32_e32 v19, s17 ; SDAG-NEXT: v_mov_b32_e32 v20, s18 ; SDAG-NEXT: v_mov_b32_e32 v21, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1596,16 +1643,17 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[14:21], v[0:7], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1633,7 +1681,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1657,7 +1706,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[14:21], a[0:3], v12, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1677,7 +1727,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GCN-NEXT: v_accvgpr_write_b32 a3, s3 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, s16 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1705,7 +1756,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a3, s23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1720,16 +1772,17 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[12:13] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s20 ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[18:19] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s20 ; GISEL-NEXT: v_accvgpr_write_b32 a1, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a2, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a3, s23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[10:17], v[0:7], a[0:3], v8, s24 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1749,7 +1802,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1770,7 +1824,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1787,7 +1842,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1809,7 +1865,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1827,7 +1884,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1868,7 +1926,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; SDAG-NEXT: v_mov_b32_e32 v17, s13 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15] ; SDAG-NEXT: s_endpgm ; @@ -1878,11 +1937,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x40 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s24 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s24 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -1893,7 +1952,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[30:31] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 2, i32 3, i32 %scale0, i32 1, i32 %scale1) @@ -1904,13 +1964,12 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32 define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, ptr addrspace(1) %ptr) #0 { ; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 +; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 ; SDAG-NEXT: s_movk_i32 s6, 0x41 ; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 @@ -1919,6 +1978,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_mov_b32_e32 v5, s13 ; SDAG-NEXT: v_mov_b32_e32 v6, s14 ; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s0 ; SDAG-NEXT: v_mov_b32_e32 v8, s16 ; SDAG-NEXT: v_mov_b32_e32 v9, s17 ; SDAG-NEXT: v_mov_b32_e32 v10, s18 @@ -1932,7 +1992,8 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; SDAG-NEXT: v_accvgpr_write_b32 a3, s3 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5] ; SDAG-NEXT: s_endpgm ; @@ -1940,15 +2001,15 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40 -; GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x50 ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 +; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s0 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -1958,8 +2019,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0,0] ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 -; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5] ; GISEL-NEXT: s_endpgm %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 -2) store <4 x float> %result, ptr addrspace(1) %ptr, align 16 @@ -1977,7 +2039,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_a( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1998,7 +2061,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_0_b( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2018,7 +2082,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_0_1(<8 ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 0, 1 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2038,7 +2103,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___constant_scale_1_0_a( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 1, 0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2062,7 +2128,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_fp6( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2085,7 +2152,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp8( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2108,7 +2176,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2131,7 +2199,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp6__v8i32_fp6_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2154,7 +2222,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2177,7 +2246,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp8( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2200,7 +2270,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v6i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2223,7 +2294,8 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v6i32_fp4__v8i32_fp8( ; GCN-NEXT: v_accvgpr_write_b32 a3, v17 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:5], v[6:13], a[0:3], v18, v19 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2246,7 +2318,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4( ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2269,7 +2341,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v8i32_fp4_ ; GCN-NEXT: v_accvgpr_write_b32 a3, v19 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 6 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll index 426764d91b8a10..05f8739e7cb890 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll @@ -38,7 +38,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -81,7 +83,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -131,7 +135,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -174,7 +180,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_1_1__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -224,7 +232,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -267,7 +277,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_2__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -317,7 +329,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -360,7 +374,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -410,7 +426,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -453,7 +471,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -503,7 +523,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -546,7 +568,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_0__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -596,7 +620,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -639,7 +665,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_2_3__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -689,7 +717,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -732,7 +762,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_3_2__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -781,7 +813,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp0__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -832,7 +866,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:1 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -875,7 +911,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:1 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -923,7 +961,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp1__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -973,7 +1013,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp2(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1020,7 +1062,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1070,7 +1114,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp3(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1117,7 +1163,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1165,7 +1213,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1212,7 +1262,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz0__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1263,7 +1315,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:1 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1306,7 +1360,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:1 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1354,7 +1410,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp0__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1405,7 +1463,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1(<8 x ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:1 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1448,7 +1508,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1(<8 x ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:1 blgp:1 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1497,7 +1559,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp1__cons ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:1 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1547,7 +1611,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp2(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1594,7 +1660,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] cbsz:1 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1644,7 +1712,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp3(<8 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1691,7 +1761,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15] cbsz:1 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1739,7 +1811,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp4(<8 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1786,7 +1860,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz1__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:11], a[0:15] cbsz:1 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1836,7 +1912,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp0(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1883,7 +1961,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1933,7 +2013,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp1(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -1980,7 +2062,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:2 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2028,7 +2112,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2075,7 +2160,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2123,7 +2209,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2170,7 +2257,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:2 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2221,7 +2309,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp0(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2268,7 +2358,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2318,7 +2410,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp1(<6 x ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2365,7 +2459,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v29 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15] cbsz:3 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2413,7 +2509,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp2(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2460,7 +2557,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:3 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2508,7 +2606,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2555,7 +2654,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15] cbsz:3 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2603,7 +2703,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp3(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2650,7 +2751,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz3__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:11], a[0:15] cbsz:3 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2698,7 +2800,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp4(<6 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2745,7 +2848,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz2__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:9], a[0:15] cbsz:2 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2793,7 +2897,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp0(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2840,7 +2946,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp0__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2888,7 +2996,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp1(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15], v28, v29 op_sel_hi:[0,0,0] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2935,7 +3045,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp1__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v27 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:11], a[0:15] cbsz:4 blgp:1 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -2983,7 +3095,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp2(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3030,7 +3143,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp2__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15] cbsz:4 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3078,7 +3192,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp3(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15], v26, v27 op_sel_hi:[0,0,0] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3125,7 +3240,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp3__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v25 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:9], a[0:15] cbsz:4 blgp:3 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3173,7 +3289,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp4(<4 x ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:7], a[0:15], v24, v25 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3220,7 +3337,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__cbsz4__blgp4__cons ; GCN-NEXT: v_accvgpr_write_b32 a15, v23 ; GCN-NEXT: s_nop 1 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3273,7 +3391,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__sgpr_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3319,7 +3439,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__sgpr_scaleA__vgpr_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3365,7 +3487,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgpr_scaleA__sgpr_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, s0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3391,6 +3515,14 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v32, s0 +; SDAG-NEXT: v_mov_b32_e32 v33, s1 +; SDAG-NEXT: v_mov_b32_e32 v34, s2 +; SDAG-NEXT: v_mov_b32_e32 v35, s3 +; SDAG-NEXT: v_mov_b32_e32 v36, s16 +; SDAG-NEXT: v_mov_b32_e32 v37, s17 +; SDAG-NEXT: v_mov_b32_e32 v38, s18 +; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_mov_b32_e32 v16, s28 ; SDAG-NEXT: v_mov_b32_e32 v31, v13 ; SDAG-NEXT: v_mov_b32_e32 v30, v12 @@ -3416,14 +3548,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_mov_b32_e32 v5, s25 ; SDAG-NEXT: v_mov_b32_e32 v6, s26 ; SDAG-NEXT: v_mov_b32_e32 v7, s27 -; SDAG-NEXT: v_mov_b32_e32 v32, s0 -; SDAG-NEXT: v_mov_b32_e32 v33, s1 -; SDAG-NEXT: v_mov_b32_e32 v34, s2 -; SDAG-NEXT: v_mov_b32_e32 v35, s3 -; SDAG-NEXT: v_mov_b32_e32 v36, s16 -; SDAG-NEXT: v_mov_b32_e32 v37, s17 -; SDAG-NEXT: v_mov_b32_e32 v38, s18 -; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 @@ -3441,7 +3565,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; SDAG-NEXT: v_accvgpr_write_b32 a15, v31 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3465,7 +3591,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s12, s0 ; GISEL-NEXT: s_mov_b32 s13, s1 -; GISEL-NEXT: v_mov_b32_e32 v16, s28 ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b32_e32 v18, v0 @@ -3476,6 +3601,11 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_mov_b32_e32 v23, v5 ; GISEL-NEXT: v_mov_b32_e32 v24, v6 ; GISEL-NEXT: v_mov_b32_e32 v25, v7 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v16, s28 ; GISEL-NEXT: v_mov_b32_e32 v26, v8 ; GISEL-NEXT: v_mov_b32_e32 v27, v9 ; GISEL-NEXT: v_mov_b32_e32 v28, v10 @@ -3483,12 +3613,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_mov_b32_e32 v30, v12 ; GISEL-NEXT: v_mov_b32_e32 v31, v13 ; GISEL-NEXT: v_mov_b32_e32 v17, s29 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[26:27] ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[24:25] ; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[22:23] ; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[20:21] @@ -3509,7 +3635,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgprs(<8 x i32> inr ; GISEL-NEXT: v_accvgpr_write_b32 a15, v31 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[32:39], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3535,7 +3663,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgpr_vgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v26, s0 ; SDAG-NEXT: v_mov_b32_e32 v27, s1 ; SDAG-NEXT: v_mov_b32_e32 v28, s2 @@ -3544,6 +3671,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_mov_b32_e32 v31, s17 ; SDAG-NEXT: v_mov_b32_e32 v32, s18 ; SDAG-NEXT: v_mov_b32_e32 v33, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3561,7 +3689,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3588,10 +3718,10 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3609,7 +3739,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__sgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], s20, v24 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3635,7 +3767,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgpr_sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_mov_b32_e32 v26, s0 ; SDAG-NEXT: v_mov_b32_e32 v27, s1 ; SDAG-NEXT: v_mov_b32_e32 v28, s2 @@ -3644,6 +3775,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_mov_b32_e32 v31, s17 ; SDAG-NEXT: v_mov_b32_e32 v32, s18 ; SDAG-NEXT: v_mov_b32_e32 v33, s19 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v8 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v9 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v10 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3661,7 +3793,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3688,10 +3822,10 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 ; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[18:19] -; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[16:17] ; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[14:15] ; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[12:13] +; GISEL-NEXT: v_accvgpr_write_b32 a0, v8 ; GISEL-NEXT: v_accvgpr_write_b32 a1, v9 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v10 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v11 @@ -3709,7 +3843,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[26:33], v[0:7], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3761,7 +3897,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v23 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3809,7 +3947,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_sgpr_vgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v23 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[26:33], a[0:15], v24, s20 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3853,7 +3993,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, s27 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3897,7 +4039,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_vgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, s27 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, s28 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3923,6 +4067,14 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgpr_sgpr: ; SDAG: ; %bb.0: ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v32, s0 +; SDAG-NEXT: v_mov_b32_e32 v33, s1 +; SDAG-NEXT: v_mov_b32_e32 v34, s2 +; SDAG-NEXT: v_mov_b32_e32 v35, s3 +; SDAG-NEXT: v_mov_b32_e32 v36, s16 +; SDAG-NEXT: v_mov_b32_e32 v37, s17 +; SDAG-NEXT: v_mov_b32_e32 v38, s18 +; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_mov_b32_e32 v16, s20 ; SDAG-NEXT: v_mov_b32_e32 v31, v13 ; SDAG-NEXT: v_mov_b32_e32 v30, v12 @@ -3940,14 +4092,6 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_mov_b32_e32 v24, s28 ; SDAG-NEXT: v_mov_b32_e32 v25, s29 ; SDAG-NEXT: v_accvgpr_write_b32 a0, v16 -; SDAG-NEXT: v_mov_b32_e32 v32, s0 -; SDAG-NEXT: v_mov_b32_e32 v33, s1 -; SDAG-NEXT: v_mov_b32_e32 v34, s2 -; SDAG-NEXT: v_mov_b32_e32 v35, s3 -; SDAG-NEXT: v_mov_b32_e32 v36, s16 -; SDAG-NEXT: v_mov_b32_e32 v37, s17 -; SDAG-NEXT: v_mov_b32_e32 v38, s18 -; SDAG-NEXT: v_mov_b32_e32 v39, s19 ; SDAG-NEXT: v_accvgpr_write_b32 a1, v17 ; SDAG-NEXT: v_accvgpr_write_b32 a2, v18 ; SDAG-NEXT: v_accvgpr_write_b32 a3, v19 @@ -3965,7 +4109,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; SDAG-NEXT: v_accvgpr_write_b32 a15, v31 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -3987,11 +4133,15 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgpr_sgpr: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v16, s20 ; GISEL-NEXT: s_mov_b32 s12, s0 ; GISEL-NEXT: s_mov_b32 s13, s1 ; GISEL-NEXT: s_mov_b32 s14, s2 ; GISEL-NEXT: s_mov_b32 s15, s3 +; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[12:13] +; GISEL-NEXT: v_mov_b32_e32 v16, s20 ; GISEL-NEXT: v_mov_b32_e32 v26, v8 ; GISEL-NEXT: v_mov_b32_e32 v27, v9 ; GISEL-NEXT: v_mov_b32_e32 v28, v10 @@ -4007,11 +4157,7 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_mov_b32_e32 v23, s27 ; GISEL-NEXT: v_mov_b32_e32 v24, s28 ; GISEL-NEXT: v_mov_b32_e32 v25, s29 -; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[18:19] ; GISEL-NEXT: v_accvgpr_write_b32 a0, v16 -; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[12:13] ; GISEL-NEXT: v_accvgpr_write_b32 a1, v17 ; GISEL-NEXT: v_accvgpr_write_b32 a2, v18 ; GISEL-NEXT: v_accvgpr_write_b32 a3, v19 @@ -4029,7 +4175,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0_sgpr_vgpr_sgpr__vgp ; GISEL-NEXT: v_accvgpr_write_b32 a15, v31 ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[32:39], v[0:7], a[0:15], v14, v15 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4074,7 +4222,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_inlineimm__ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 33, -2 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4120,7 +4270,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, -2 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4162,7 +4314,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, -2 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4209,7 +4363,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v31 op_sel_hi:[0,0,0] -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4252,7 +4408,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__scaleA_kimm__scale ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -4280,7 +4438,6 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_accvgpr_write_b32 a0, s36 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 @@ -4314,14 +4471,17 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; SDAG-NEXT: v_accvgpr_write_b32 a13, s49 ; SDAG-NEXT: v_accvgpr_write_b32 a14, s50 ; SDAG-NEXT: v_accvgpr_write_b32 a15, s51 -; SDAG-NEXT: v_mov_b32_e32 v17, s1 +; SDAG-NEXT: v_mov_b32_e32 v16, s1 ; SDAG-NEXT: s_nop 1 -; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v17 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v16, a[12:15], s[2:3] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, a[8:11], s[2:3] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, a[4:7], s[2:3] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[2:3] +; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] blgp:2 +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[2:3] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd: @@ -4331,11 +4491,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -4358,7 +4518,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32 @@ -4372,13 +4534,11 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32> define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, ptr addrspace(1) %ptr) #0 { ; SDAG-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; SDAG: ; %bb.0: -; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 -; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 +; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 ; SDAG-NEXT: s_movk_i32 s2, 0x41 -; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_accvgpr_write_b32 a0, s36 ; SDAG-NEXT: v_mov_b32_e32 v0, s8 ; SDAG-NEXT: v_mov_b32_e32 v1, s9 ; SDAG-NEXT: v_mov_b32_e32 v2, s10 @@ -4387,6 +4547,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG-NEXT: v_mov_b32_e32 v5, s13 ; SDAG-NEXT: v_mov_b32_e32 v6, s14 ; SDAG-NEXT: v_mov_b32_e32 v7, s15 +; SDAG-NEXT: v_accvgpr_write_b32 a0, s36 ; SDAG-NEXT: v_mov_b32_e32 v8, s16 ; SDAG-NEXT: v_mov_b32_e32 v9, s17 ; SDAG-NEXT: v_mov_b32_e32 v10, s18 @@ -4412,26 +4573,29 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; SDAG-NEXT: v_accvgpr_write_b32 a15, s51 ; SDAG-NEXT: s_nop 1 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s2, -2 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 -; SDAG-NEXT: global_store_dwordx4 v16, a[12:15], s[0:1] offset:48 -; SDAG-NEXT: global_store_dwordx4 v16, a[8:11], s[0:1] offset:32 -; SDAG-NEXT: global_store_dwordx4 v16, a[4:7], s[0:1] offset:16 -; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[0:1] +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 +; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40 -; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; GISEL-NEXT: v_mov_b32_e32 v16, 0x41 +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s36 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] @@ -4453,7 +4617,9 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_ ; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, -2 op_sel_hi:[0,0,0] blgp:2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 ; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] ; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 ; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 @@ -4551,13 +4717,15 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -4577,36 +4745,34 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__nonmac(<8 x ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 ; GISEL-NEXT: v_mov_b32_e32 v16, s1 -; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel_hi:[0,0,0] ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) @@ -4699,13 +4865,16 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -4724,35 +4893,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__nonmac(<8 ; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[16:19], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 25, i32 0, i32 42) @@ -4845,13 +5012,16 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a31, s23 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a31, s23 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -4870,35 +5040,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma ; GISEL-NEXT: v_accvgpr_write_b32 a18, s10 ; GISEL-NEXT: v_accvgpr_write_b32 a17, s9 ; GISEL-NEXT: v_accvgpr_write_b32 a16, s8 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[16:31] blgp:2 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[16:19], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0) @@ -4991,13 +5159,16 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL: ; %bb.0: ; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 ; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40 +; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0 +; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37] -; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] -; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39] ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41] ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45] +; GISEL-NEXT: v_accvgpr_write_b32 a0, s8 ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47] ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49] ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51] @@ -5016,35 +5187,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non ; GISEL-NEXT: v_accvgpr_write_b32 a13, s21 ; GISEL-NEXT: v_accvgpr_write_b32 a14, s22 ; GISEL-NEXT: v_accvgpr_write_b32 a15, s23 -; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11] -; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48 +; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13] -; GISEL-NEXT: v_mov_b64_e32 v[4:5], 0 -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15] -; GISEL-NEXT: v_mov_b64_e32 v[6:7], 16 -; GISEL-NEXT: global_store_dwordx4 v[4:5], v[16:19], off sc0 sc1 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23] +; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_mov_b64_e32 v[8:9], 32 -; GISEL-NEXT: v_mov_b64_e32 v[10:11], 48 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[16:17] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] -; GISEL-NEXT: global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_nop 0 -; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[20:21] -; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[22:23] -; GISEL-NEXT: global_store_dwordx4 v[10:11], v[0:3], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[4:5], a[0:3], off sc0 sc1 +; GISEL-NEXT: s_nop 2 +; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[6:7], a[4:7], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[8:9], a[8:11], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: global_store_dwordx4 v[10:11], a[12:15], off sc0 sc1 +; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_endpgm %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 25, i32 0, i32 42) @@ -5076,7 +5245,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_a( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5121,7 +5292,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_0_b( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5166,7 +5339,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_0_1(<8 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 0, 1 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5211,7 +5386,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___constant_scale_1_0_a( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 1, 0 op_sel_hi:[0,0,0] -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5262,7 +5439,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp6( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5305,7 +5484,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp6( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:2 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5355,7 +5536,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp8( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5398,7 +5581,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp8( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:2 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5448,7 +5633,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5491,7 +5677,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:2 blgp:2 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5539,7 +5726,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp6__v8i32_fp6_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:2 blgp:2 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5589,7 +5777,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp4( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] blgp:4 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5632,7 +5822,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v8i32_fp4( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] blgp:4 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5682,7 +5874,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp8( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:4 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5725,7 +5919,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp8( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:4 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5774,7 +5970,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8__v6i32_fp4( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5823,7 +6021,9 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v6i32_fp4__v8i32_fp8( ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:5], v[6:13], a[0:15], v30, v31 op_sel_hi:[0,0,0] cbsz:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5873,7 +6073,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4( ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 ; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; SDAG-NEXT: s_nop 3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5916,7 +6117,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4( ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: s_nop 0 ; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v31, v32 op_sel_hi:[0,0,0] cbsz:4 blgp:4 -; GISEL-NEXT: s_nop 3 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 2 ; GISEL-NEXT: v_accvgpr_read_b32 v0, a0 ; GISEL-NEXT: v_accvgpr_read_b32 v1, a1 ; GISEL-NEXT: v_accvgpr_read_b32 v2, a2 @@ -5964,7 +6166,8 @@ define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp4__v8i32_fp4_ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 ; GCN-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15] cbsz:4 blgp:4 -; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 2 ; GCN-NEXT: v_accvgpr_read_b32 v0, a0 ; GCN-NEXT: v_accvgpr_read_b32 v1, a1 ; GCN-NEXT: v_accvgpr_read_b32 v2, a2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll new file mode 100644 index 00000000000000..0d5dfa46c2c260 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s + +; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane16.swap +; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane16.swap) + + +declare { i32, i32 } @llvm.amdgcn.permlane16.swap(i32, i32, i1 immarg, i1 immarg) + +define { i32, i32 } @v_permlane16_swap_b32_vv(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vi(i32 %vdst_old) { +; GCN-LABEL: v_permlane16_swap_b32_vi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 1, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vl(i32 %vdst_old) { +; GCN-LABEL: v_permlane16_swap_b32_vl: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 0xc1d1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 49617, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_iv(i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_iv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 1, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_sv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv_fi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 true) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane16_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane16_swap_b32_vv_fi_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 true) + ret { i32, i32 } %v +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll new file mode 100644 index 00000000000000..e3b0879af4307d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s + +; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane32.swap +; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane32.swap) + + +declare { i32, i32 } @llvm.amdgcn.permlane32.swap(i32, i32, i1 immarg, i1 immarg) + +define { i32, i32 } @v_permlane32_swap_b32_vv(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vi(i32 %vdst_old) { +; GCN-LABEL: v_permlane32_swap_b32_vi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 1, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vl(i32 %vdst_old) { +; GCN-LABEL: v_permlane32_swap_b32_vl: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 0xc1d1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 49617, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_iv(i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_iv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 1, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_ss: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_sv: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: v_permlane32_swap_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv_fi: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e64 v0, v1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 false) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e64 v0, v1 bound_ctrl:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 false, i1 true) + ret { i32, i32 } %v +} + +define { i32, i32 } @v_permlane32_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old) { +; GCN-LABEL: v_permlane32_swap_b32_vv_fi_bc: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_permlane32_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %v = call { i32, i32 } @llvm.amdgcn.permlane32.swap(i32 %vdst_old, i32 %src0_old, i1 true, i1 true) + ret { i32, i32 } %v +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll index a2b7a649f9122f..be6ef315e4c743 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll @@ -2970,6 +2970,1662 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__sgpr(<4 x i32> inreg %arg ret <4 x float> %result } +; -------------------------------------------------------------------- +; llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8 +; -------------------------------------------------------------------- + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32 immarg, i32 immarg) + +define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace(1) %arg, <4 x i32> %a, <8 x i32> %b, i32 %idx) #0 { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__vgpr: +; SDAG: ; %bb.0: ; %bb +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; SDAG-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; SDAG-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; SDAG-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; SDAG-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dword s2, s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s16 +; SDAG-NEXT: v_mov_b32_e32 v21, s17 +; SDAG-NEXT: v_mov_b32_e32 v22, s18 +; SDAG-NEXT: v_mov_b32_e32 v23, s19 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__vgpr: +; GISEL: ; %bb.0: ; %bb +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GISEL-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; GISEL-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; GISEL-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v28, s2 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_endpgm +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr <16 x float>, ptr addrspace(1) %arg, i32 %id + %in.1 = load <16 x float>, ptr addrspace(1) %gep + %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %a, <8 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) + store <16 x float> %mai.1, ptr addrspace(1) %arg + ret void +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[0:3], v[4:11], v28 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[48:51], v[30:37], v28 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__flags0(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__flags0: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__flags0: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[48:51], v[30:37], v28 cbsz:1 abid:3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 1, i32 immarg 3) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__flags1(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__flags1: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__flags1: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[48:51], v[30:37], v28 cbsz:3 abid:1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 3, i32 immarg 1) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__sgpr(<4 x i32> inreg %arg0, <8 x i32> inreg %arg1, <16 x float> inreg %arg2, i32 inreg %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__sgpr: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v28, s0 +; SDAG-NEXT: v_mov_b32_e32 v29, s1 +; SDAG-NEXT: v_mov_b32_e32 v30, s2 +; SDAG-NEXT: v_mov_b32_e32 v31, s3 +; SDAG-NEXT: v_mov_b32_e32 v12, s24 +; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, v8 +; SDAG-NEXT: v_mov_b32_e32 v25, v7 +; SDAG-NEXT: v_mov_b32_e32 v24, v6 +; SDAG-NEXT: v_mov_b32_e32 v23, v5 +; SDAG-NEXT: v_mov_b32_e32 v22, v4 +; SDAG-NEXT: v_mov_b32_e32 v21, v3 +; SDAG-NEXT: v_mov_b32_e32 v20, v2 +; SDAG-NEXT: v_mov_b32_e32 v19, v1 +; SDAG-NEXT: v_mov_b32_e32 v18, v0 +; SDAG-NEXT: v_mov_b32_e32 v13, s25 +; SDAG-NEXT: v_mov_b32_e32 v14, s26 +; SDAG-NEXT: v_mov_b32_e32 v15, s27 +; SDAG-NEXT: v_mov_b32_e32 v16, s28 +; SDAG-NEXT: v_mov_b32_e32 v17, s29 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: v_mov_b32_e32 v4, s20 +; SDAG-NEXT: v_mov_b32_e32 v5, s21 +; SDAG-NEXT: v_mov_b32_e32 v6, s22 +; SDAG-NEXT: v_mov_b32_e32 v7, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 a[0:15], v[28:31], v[0:7], v10 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__sgpr: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[2:3] +; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[0:1] +; GISEL-NEXT: v_mov_b32_e32 v18, s24 +; GISEL-NEXT: v_mov_b32_e32 v19, s25 +; GISEL-NEXT: v_mov_b32_e32 v24, v0 +; GISEL-NEXT: v_mov_b32_e32 v25, v1 +; GISEL-NEXT: v_mov_b32_e32 v26, v2 +; GISEL-NEXT: v_mov_b32_e32 v27, v3 +; GISEL-NEXT: v_mov_b32_e32 v28, v4 +; GISEL-NEXT: v_mov_b32_e32 v29, v5 +; GISEL-NEXT: v_mov_b32_e32 v30, v6 +; GISEL-NEXT: v_mov_b32_e32 v31, v7 +; GISEL-NEXT: v_mov_b32_e32 v32, v8 +; GISEL-NEXT: v_mov_b32_e32 v33, v9 +; GISEL-NEXT: v_mov_b32_e32 v16, v10 +; GISEL-NEXT: v_mov_b32_e32 v20, s26 +; GISEL-NEXT: v_mov_b32_e32 v21, s27 +; GISEL-NEXT: v_mov_b32_e32 v22, s28 +; GISEL-NEXT: v_mov_b32_e32 v23, s29 +; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[28:29] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[30:31] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[32:33] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[34:37], v[48:55], v16 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8 +; -------------------------------------------------------------------- + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32 immarg, i32 immarg) + +define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace(1) %arg, <4 x i32> %a, <8 x i32> %b, i32 %idx) #0 { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__vgpr: +; SDAG: ; %bb.0: ; %bb +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; SDAG-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; SDAG-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; SDAG-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; SDAG-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dword s2, s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s16 +; SDAG-NEXT: v_mov_b32_e32 v21, s17 +; SDAG-NEXT: v_mov_b32_e32 v22, s18 +; SDAG-NEXT: v_mov_b32_e32 v23, s19 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__vgpr: +; GISEL: ; %bb.0: ; %bb +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GISEL-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; GISEL-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; GISEL-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v28, s2 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_endpgm +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr <16 x float>, ptr addrspace(1) %arg, i32 %id + %in.1 = load <16 x float>, ptr addrspace(1) %gep + %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) + store <16 x float> %mai.1, ptr addrspace(1) %arg + ret void +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[0:3], v[4:11], v28 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[48:51], v[30:37], v28 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__flags0(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__flags0: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__flags0: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[48:51], v[30:37], v28 cbsz:1 abid:3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 1, i32 immarg 3) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__flags1(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__flags1: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__flags1: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[48:51], v[30:37], v28 cbsz:3 abid:1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 3, i32 immarg 1) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__sgpr(<4 x i32> inreg %arg0, <8 x i32> inreg %arg1, <16 x float> inreg %arg2, i32 inreg %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__sgpr: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v28, s0 +; SDAG-NEXT: v_mov_b32_e32 v29, s1 +; SDAG-NEXT: v_mov_b32_e32 v30, s2 +; SDAG-NEXT: v_mov_b32_e32 v31, s3 +; SDAG-NEXT: v_mov_b32_e32 v12, s24 +; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, v8 +; SDAG-NEXT: v_mov_b32_e32 v25, v7 +; SDAG-NEXT: v_mov_b32_e32 v24, v6 +; SDAG-NEXT: v_mov_b32_e32 v23, v5 +; SDAG-NEXT: v_mov_b32_e32 v22, v4 +; SDAG-NEXT: v_mov_b32_e32 v21, v3 +; SDAG-NEXT: v_mov_b32_e32 v20, v2 +; SDAG-NEXT: v_mov_b32_e32 v19, v1 +; SDAG-NEXT: v_mov_b32_e32 v18, v0 +; SDAG-NEXT: v_mov_b32_e32 v13, s25 +; SDAG-NEXT: v_mov_b32_e32 v14, s26 +; SDAG-NEXT: v_mov_b32_e32 v15, s27 +; SDAG-NEXT: v_mov_b32_e32 v16, s28 +; SDAG-NEXT: v_mov_b32_e32 v17, s29 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: v_mov_b32_e32 v4, s20 +; SDAG-NEXT: v_mov_b32_e32 v5, s21 +; SDAG-NEXT: v_mov_b32_e32 v6, s22 +; SDAG-NEXT: v_mov_b32_e32 v7, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 a[0:15], v[28:31], v[0:7], v10 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__sgpr: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[2:3] +; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[0:1] +; GISEL-NEXT: v_mov_b32_e32 v18, s24 +; GISEL-NEXT: v_mov_b32_e32 v19, s25 +; GISEL-NEXT: v_mov_b32_e32 v24, v0 +; GISEL-NEXT: v_mov_b32_e32 v25, v1 +; GISEL-NEXT: v_mov_b32_e32 v26, v2 +; GISEL-NEXT: v_mov_b32_e32 v27, v3 +; GISEL-NEXT: v_mov_b32_e32 v28, v4 +; GISEL-NEXT: v_mov_b32_e32 v29, v5 +; GISEL-NEXT: v_mov_b32_e32 v30, v6 +; GISEL-NEXT: v_mov_b32_e32 v31, v7 +; GISEL-NEXT: v_mov_b32_e32 v32, v8 +; GISEL-NEXT: v_mov_b32_e32 v33, v9 +; GISEL-NEXT: v_mov_b32_e32 v16, v10 +; GISEL-NEXT: v_mov_b32_e32 v20, s26 +; GISEL-NEXT: v_mov_b32_e32 v21, s27 +; GISEL-NEXT: v_mov_b32_e32 v22, s28 +; GISEL-NEXT: v_mov_b32_e32 v23, s29 +; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[28:29] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[30:31] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[32:33] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[34:37], v[48:55], v16 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8 +; -------------------------------------------------------------------- + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32 immarg, i32 immarg) + +define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace(1) %arg, <4 x i32> %a, <8 x i32> %b, i32 %idx) #0 { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__vgpr: +; SDAG: ; %bb.0: ; %bb +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; SDAG-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; SDAG-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; SDAG-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; SDAG-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dword s2, s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s16 +; SDAG-NEXT: v_mov_b32_e32 v21, s17 +; SDAG-NEXT: v_mov_b32_e32 v22, s18 +; SDAG-NEXT: v_mov_b32_e32 v23, s19 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__vgpr: +; GISEL: ; %bb.0: ; %bb +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GISEL-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; GISEL-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; GISEL-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v28, s2 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_endpgm +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr <16 x float>, ptr addrspace(1) %arg, i32 %id + %in.1 = load <16 x float>, ptr addrspace(1) %gep + %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %a, <8 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) + store <16 x float> %mai.1, ptr addrspace(1) %arg + ret void +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[0:3], v[4:11], v28 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[48:51], v[30:37], v28 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__flags0(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__flags0: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__flags0: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[48:51], v[30:37], v28 cbsz:1 abid:3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 1, i32 immarg 3) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__flags1(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__flags1: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__flags1: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[48:51], v[30:37], v28 cbsz:3 abid:1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 3, i32 immarg 1) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__sgpr(<4 x i32> inreg %arg0, <8 x i32> inreg %arg1, <16 x float> inreg %arg2, i32 inreg %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__sgpr: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v28, s0 +; SDAG-NEXT: v_mov_b32_e32 v29, s1 +; SDAG-NEXT: v_mov_b32_e32 v30, s2 +; SDAG-NEXT: v_mov_b32_e32 v31, s3 +; SDAG-NEXT: v_mov_b32_e32 v12, s24 +; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, v8 +; SDAG-NEXT: v_mov_b32_e32 v25, v7 +; SDAG-NEXT: v_mov_b32_e32 v24, v6 +; SDAG-NEXT: v_mov_b32_e32 v23, v5 +; SDAG-NEXT: v_mov_b32_e32 v22, v4 +; SDAG-NEXT: v_mov_b32_e32 v21, v3 +; SDAG-NEXT: v_mov_b32_e32 v20, v2 +; SDAG-NEXT: v_mov_b32_e32 v19, v1 +; SDAG-NEXT: v_mov_b32_e32 v18, v0 +; SDAG-NEXT: v_mov_b32_e32 v13, s25 +; SDAG-NEXT: v_mov_b32_e32 v14, s26 +; SDAG-NEXT: v_mov_b32_e32 v15, s27 +; SDAG-NEXT: v_mov_b32_e32 v16, s28 +; SDAG-NEXT: v_mov_b32_e32 v17, s29 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: v_mov_b32_e32 v4, s20 +; SDAG-NEXT: v_mov_b32_e32 v5, s21 +; SDAG-NEXT: v_mov_b32_e32 v6, s22 +; SDAG-NEXT: v_mov_b32_e32 v7, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 a[0:15], v[28:31], v[0:7], v10 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__sgpr: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[2:3] +; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[0:1] +; GISEL-NEXT: v_mov_b32_e32 v18, s24 +; GISEL-NEXT: v_mov_b32_e32 v19, s25 +; GISEL-NEXT: v_mov_b32_e32 v24, v0 +; GISEL-NEXT: v_mov_b32_e32 v25, v1 +; GISEL-NEXT: v_mov_b32_e32 v26, v2 +; GISEL-NEXT: v_mov_b32_e32 v27, v3 +; GISEL-NEXT: v_mov_b32_e32 v28, v4 +; GISEL-NEXT: v_mov_b32_e32 v29, v5 +; GISEL-NEXT: v_mov_b32_e32 v30, v6 +; GISEL-NEXT: v_mov_b32_e32 v31, v7 +; GISEL-NEXT: v_mov_b32_e32 v32, v8 +; GISEL-NEXT: v_mov_b32_e32 v33, v9 +; GISEL-NEXT: v_mov_b32_e32 v16, v10 +; GISEL-NEXT: v_mov_b32_e32 v20, s26 +; GISEL-NEXT: v_mov_b32_e32 v21, s27 +; GISEL-NEXT: v_mov_b32_e32 v22, s28 +; GISEL-NEXT: v_mov_b32_e32 v23, s29 +; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[28:29] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[30:31] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[32:33] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[34:37], v[48:55], v16 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8 +; -------------------------------------------------------------------- + +declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32>, <8 x i32>, <16 x float>, i32, i32 immarg, i32 immarg) + +define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace(1) %arg, <4 x i32> %a, <8 x i32> %b, i32 %idx) #0 { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__vgpr: +; SDAG: ; %bb.0: ; %bb +; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; SDAG-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; SDAG-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; SDAG-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; SDAG-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: s_load_dword s2, s[4:5], 0x64 +; SDAG-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s16 +; SDAG-NEXT: v_mov_b32_e32 v21, s17 +; SDAG-NEXT: v_mov_b32_e32 v22, s18 +; SDAG-NEXT: v_mov_b32_e32 v23, s19 +; SDAG-NEXT: v_mov_b32_e32 v28, s2 +; SDAG-NEXT: s_waitcnt vmcnt(0) +; SDAG-NEXT: s_nop 0 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__vgpr: +; GISEL: ; %bb.0: ; %bb +; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GISEL-NEXT: v_lshlrev_b32_e32 v16, 6, v0 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] +; GISEL-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 +; GISEL-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 +; GISEL-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 +; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54 +; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64 +; GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11] +; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9] +; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19] +; GISEL-NEXT: v_mov_b32_e32 v28, s2 +; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13] +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 +; GISEL-NEXT: v_mov_b32_e32 v16, 0 +; GISEL-NEXT: s_nop 7 +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 +; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 +; GISEL-NEXT: s_endpgm +bb: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr <16 x float>, ptr addrspace(1) %arg, i32 %id + %in.1 = load <16 x float>, ptr addrspace(1) %gep + %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %a, <8 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) + store <16 x float> %mai.1, ptr addrspace(1) %arg + ret void +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[0:3], v[4:11], v28 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[48:51], v[30:37], v28 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__flags0(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__flags0: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:1 abid:3 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__flags0: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[48:51], v[30:37], v28 cbsz:1 abid:3 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 1, i32 immarg 3) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__flags1(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__flags1: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[0:3], v[4:11], v28 cbsz:3 abid:1 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__flags1: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v48, v0 +; GISEL-NEXT: v_mov_b32_e32 v49, v1 +; GISEL-NEXT: v_mov_b32_e32 v50, v2 +; GISEL-NEXT: v_mov_b32_e32 v51, v3 +; GISEL-NEXT: v_mov_b32_e32 v30, v4 +; GISEL-NEXT: v_mov_b32_e32 v31, v5 +; GISEL-NEXT: v_mov_b32_e32 v32, v6 +; GISEL-NEXT: v_mov_b32_e32 v33, v7 +; GISEL-NEXT: v_mov_b32_e32 v34, v8 +; GISEL-NEXT: v_mov_b32_e32 v35, v9 +; GISEL-NEXT: v_mov_b32_e32 v36, v10 +; GISEL-NEXT: v_mov_b32_e32 v37, v11 +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[12:13] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[26:27] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[48:51], v[30:37], v28 cbsz:3 abid:1 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 3, i32 immarg 1) + ret <16 x float> %result +} + +define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg0, <8 x i32> inreg %arg1, <16 x float> inreg %arg2, i32 inreg %arg3) { +; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__sgpr: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v28, s0 +; SDAG-NEXT: v_mov_b32_e32 v29, s1 +; SDAG-NEXT: v_mov_b32_e32 v30, s2 +; SDAG-NEXT: v_mov_b32_e32 v31, s3 +; SDAG-NEXT: v_mov_b32_e32 v12, s24 +; SDAG-NEXT: v_mov_b32_e32 v27, v9 +; SDAG-NEXT: v_mov_b32_e32 v26, v8 +; SDAG-NEXT: v_mov_b32_e32 v25, v7 +; SDAG-NEXT: v_mov_b32_e32 v24, v6 +; SDAG-NEXT: v_mov_b32_e32 v23, v5 +; SDAG-NEXT: v_mov_b32_e32 v22, v4 +; SDAG-NEXT: v_mov_b32_e32 v21, v3 +; SDAG-NEXT: v_mov_b32_e32 v20, v2 +; SDAG-NEXT: v_mov_b32_e32 v19, v1 +; SDAG-NEXT: v_mov_b32_e32 v18, v0 +; SDAG-NEXT: v_mov_b32_e32 v13, s25 +; SDAG-NEXT: v_mov_b32_e32 v14, s26 +; SDAG-NEXT: v_mov_b32_e32 v15, s27 +; SDAG-NEXT: v_mov_b32_e32 v16, s28 +; SDAG-NEXT: v_mov_b32_e32 v17, s29 +; SDAG-NEXT: v_accvgpr_write_b32 a0, v12 +; SDAG-NEXT: v_mov_b32_e32 v0, s16 +; SDAG-NEXT: v_mov_b32_e32 v1, s17 +; SDAG-NEXT: v_mov_b32_e32 v2, s18 +; SDAG-NEXT: v_mov_b32_e32 v3, s19 +; SDAG-NEXT: v_mov_b32_e32 v4, s20 +; SDAG-NEXT: v_mov_b32_e32 v5, s21 +; SDAG-NEXT: v_mov_b32_e32 v6, s22 +; SDAG-NEXT: v_mov_b32_e32 v7, s23 +; SDAG-NEXT: v_accvgpr_write_b32 a1, v13 +; SDAG-NEXT: v_accvgpr_write_b32 a2, v14 +; SDAG-NEXT: v_accvgpr_write_b32 a3, v15 +; SDAG-NEXT: v_accvgpr_write_b32 a4, v16 +; SDAG-NEXT: v_accvgpr_write_b32 a5, v17 +; SDAG-NEXT: v_accvgpr_write_b32 a6, v18 +; SDAG-NEXT: v_accvgpr_write_b32 a7, v19 +; SDAG-NEXT: v_accvgpr_write_b32 a8, v20 +; SDAG-NEXT: v_accvgpr_write_b32 a9, v21 +; SDAG-NEXT: v_accvgpr_write_b32 a10, v22 +; SDAG-NEXT: v_accvgpr_write_b32 a11, v23 +; SDAG-NEXT: v_accvgpr_write_b32 a12, v24 +; SDAG-NEXT: v_accvgpr_write_b32 a13, v25 +; SDAG-NEXT: v_accvgpr_write_b32 a14, v26 +; SDAG-NEXT: v_accvgpr_write_b32 a15, v27 +; SDAG-NEXT: s_nop 1 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 a[0:15], v[28:31], v[0:7], v10 +; SDAG-NEXT: s_nop 7 +; SDAG-NEXT: s_nop 2 +; SDAG-NEXT: v_accvgpr_read_b32 v0, a0 +; SDAG-NEXT: v_accvgpr_read_b32 v1, a1 +; SDAG-NEXT: v_accvgpr_read_b32 v2, a2 +; SDAG-NEXT: v_accvgpr_read_b32 v3, a3 +; SDAG-NEXT: v_accvgpr_read_b32 v4, a4 +; SDAG-NEXT: v_accvgpr_read_b32 v5, a5 +; SDAG-NEXT: v_accvgpr_read_b32 v6, a6 +; SDAG-NEXT: v_accvgpr_read_b32 v7, a7 +; SDAG-NEXT: v_accvgpr_read_b32 v8, a8 +; SDAG-NEXT: v_accvgpr_read_b32 v9, a9 +; SDAG-NEXT: v_accvgpr_read_b32 v10, a10 +; SDAG-NEXT: v_accvgpr_read_b32 v11, a11 +; SDAG-NEXT: v_accvgpr_read_b32 v12, a12 +; SDAG-NEXT: v_accvgpr_read_b32 v13, a13 +; SDAG-NEXT: v_accvgpr_read_b32 v14, a14 +; SDAG-NEXT: v_accvgpr_read_b32 v15, a15 +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__sgpr: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[2:3] +; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[0:1] +; GISEL-NEXT: v_mov_b32_e32 v18, s24 +; GISEL-NEXT: v_mov_b32_e32 v19, s25 +; GISEL-NEXT: v_mov_b32_e32 v24, v0 +; GISEL-NEXT: v_mov_b32_e32 v25, v1 +; GISEL-NEXT: v_mov_b32_e32 v26, v2 +; GISEL-NEXT: v_mov_b32_e32 v27, v3 +; GISEL-NEXT: v_mov_b32_e32 v28, v4 +; GISEL-NEXT: v_mov_b32_e32 v29, v5 +; GISEL-NEXT: v_mov_b32_e32 v30, v6 +; GISEL-NEXT: v_mov_b32_e32 v31, v7 +; GISEL-NEXT: v_mov_b32_e32 v32, v8 +; GISEL-NEXT: v_mov_b32_e32 v33, v9 +; GISEL-NEXT: v_mov_b32_e32 v16, v10 +; GISEL-NEXT: v_mov_b32_e32 v20, s26 +; GISEL-NEXT: v_mov_b32_e32 v21, s27 +; GISEL-NEXT: v_mov_b32_e32 v22, s28 +; GISEL-NEXT: v_mov_b32_e32 v23, s29 +; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[0:1], v[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19] +; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17] +; GISEL-NEXT: v_mov_b64_e32 v[2:3], v[20:21] +; GISEL-NEXT: v_mov_b64_e32 v[4:5], v[22:23] +; GISEL-NEXT: v_mov_b64_e32 v[6:7], v[24:25] +; GISEL-NEXT: v_mov_b64_e32 v[8:9], v[26:27] +; GISEL-NEXT: v_mov_b64_e32 v[10:11], v[28:29] +; GISEL-NEXT: v_mov_b64_e32 v[12:13], v[30:31] +; GISEL-NEXT: v_mov_b64_e32 v[14:15], v[32:33] +; GISEL-NEXT: s_nop 1 +; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[34:37], v[48:55], v16 +; GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0) + ret <16 x float> %result +} + attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 824d3708c027db..33dd2bd540ad06 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -4,29 +4,15 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s -; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -passes='default' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s - ; GCN-LABEL: {{^}}fold_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( ; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32 ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: store i32 %tmp, ptr addrspace(1) %arg, align 4 -; OPT-NEXT: ret void define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { + bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 store i32 %tmp, ptr addrspace(1) %arg, align 4 @@ -34,18 +20,12 @@ bb: } ; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( ; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}} ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}} ; GCN-NOT: cndmask ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT: %tmp2 = select i1 %tmp1, i32 2, i32 1 -; OPT: store i32 %tmp2, ptr addrspace(1) %arg -; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: @@ -57,13 +37,6 @@ bb: } ; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize: -; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( - -; OPT: bb: -; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT: %tmp1 = icmp ugt i32 %tmp, 32 -; OPT: bb3: -; OPT-NEXT: ret void define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index 9681b01f334f9a..2ba873f55a1eb0 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s # GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read # GCN: V_MOV_B32 @@ -144,7 +145,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_agpr_mfma_read_overlap body: | @@ -154,7 +156,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap body: | @@ -164,7 +167,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap body: | @@ -174,8 +178,11 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -185,8 +192,11 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap body: | @@ -215,8 +225,11 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap body: | @@ -228,7 +241,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap body: | @@ -240,7 +254,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap body: | @@ -272,7 +287,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap body: | @@ -282,8 +298,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap body: | @@ -303,8 +323,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap body: | @@ -324,7 +348,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -335,7 +360,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -347,7 +373,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap body: | @@ -358,7 +385,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial body: | @@ -369,7 +397,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial body: | @@ -507,8 +536,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap body: | @@ -528,8 +561,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap body: | @@ -599,8 +636,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap body: | @@ -610,8 +651,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_SMFMAC name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap body: | @@ -621,8 +666,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_SMFMAC name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap body: | @@ -803,8 +853,12 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MOV_B32 name: dmfma16x16_write_vgpr_valu_read body: | @@ -867,8 +921,13 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_DOT name: dmfma16x16_write_vgpr_dot_read body: | @@ -1303,8 +1362,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -1324,8 +1387,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 0 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 0 + # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap body: | @@ -1335,7 +1403,8 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap body: | @@ -1346,7 +1415,8 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap body: | @@ -1358,7 +1428,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap body: | @@ -1398,8 +1469,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap body: | @@ -1419,8 +1494,13 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 + # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap body: | @@ -1450,8 +1530,12 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_MFMA name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap body: | @@ -1505,8 +1589,12 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_agpr_valu_read # GCN: V_MFMA -# GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 7 +# GFX940-NEXT: S_NOP 2 + +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 7 +# GFX950-NEXT: S_NOP 2 # GCN-NEXT: V_ACCVGPR_READ_B32_e64 name: dmfma16x16_write_agpr_valu_read body: | @@ -1575,7 +1663,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap body: | @@ -1585,7 +1674,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap body: | @@ -1595,7 +1685,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap body: | @@ -1605,7 +1696,8 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap body: | @@ -1615,7 +1707,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: smfmac16x16_write_agpr_smfmac_read_overlap body: | @@ -1713,7 +1806,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap body: | @@ -1724,7 +1818,8 @@ body: | # GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap body: | @@ -1790,7 +1885,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap body: | @@ -1909,7 +2005,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap body: | @@ -1919,7 +2016,8 @@ body: | ... # GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: smfmac16x16_mfma_write_agpr_mfma_read_overlap body: | @@ -2033,7 +2131,8 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2078,7 +2177,8 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2120,7 +2220,8 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc body: | @@ -2165,7 +2266,8 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc body: | @@ -2208,7 +2310,8 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc body: | @@ -2254,7 +2357,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc body: | @@ -2342,7 +2446,8 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2385,7 +2490,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc body: | @@ -2432,7 +2538,8 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 2 +# GFX940-NEXT: S_NOP 2 +# GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC_ name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc body: | @@ -2446,7 +2553,8 @@ body: | ... # GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GCN-NEXT: S_NOP 4 +# GFX940-NEXT: S_NOP 4 +# GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC_ name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | @@ -2460,7 +2568,8 @@ body: | # GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC_ name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | @@ -2474,7 +2583,8 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GCN-NEXT: S_NOP 0 +# GFX940-NEXT: S_NOP 0 +# GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC_ name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc body: | diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir index d91ef7b42a4a3b..f68b84c7140ba4 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-mfma-scale.gfx950.mir @@ -15,6 +15,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -36,6 +37,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 1, 1, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -57,6 +59,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -78,6 +81,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 0, 2, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -98,7 +102,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 2, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 1 + ; GCN-NEXT: S_NOP 5 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 2, 2, implicit $mode, implicit $exec @@ -118,7 +122,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 3, 3, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 1 + ; GCN-NEXT: S_NOP 5 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 3, 3, implicit $mode, implicit $exec @@ -138,7 +142,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 4, 4, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 1 + ; GCN-NEXT: S_NOP 5 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed $vgpr0_vgpr1_vgpr2_vgpr3, 4, 4, implicit $mode, implicit $exec @@ -159,6 +163,8 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -180,6 +186,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, killed $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -201,6 +208,8 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -222,6 +231,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 2, 2, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 7 ; GCN-NEXT: S_NOP 1 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = nofpexcept V_MFMA_SCALE_F32_32X32X64_F8F6F4_f8_f8_vgprcd_e64 $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, $sgpr4, $vgpr32, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 @@ -243,7 +253,8 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 3 + ; GCN-NEXT: S_NOP 7 + ; GCN-NEXT: S_NOP 2 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 0, 0, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec @@ -264,7 +275,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $sgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 3 + ; GCN-NEXT: S_NOP 6 ; GCN-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, killed $sgpr4, killed $vgpr21, 12, 4, implicit $mode, implicit $exec ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 renamable $vgpr0_vgpr1_vgpr2_vgpr3 = nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_vgprcd_e64 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19, 2, 2, $sgpr4, $vgpr21, 12, 4, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll index 3d5972f065859f..908dbd7a11a6b6 100644 --- a/llvm/test/CodeGen/ARM/fcmp-xo.ll +++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll @@ -54,12 +54,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr { ; NEON-LABEL: float128: ; NEON: @ %bb.0: ; NEON-NEXT: mov.w r0, #1124073472 -; NEON-NEXT: vmov.f32 s2, #5.000000e-01 -; NEON-NEXT: vmov d3, r0, r0 -; NEON-NEXT: vmov.f32 s4, #-5.000000e-01 -; NEON-NEXT: vcmp.f32 s6, s0 +; NEON-NEXT: vmov.f32 s4, #5.000000e-01 +; NEON-NEXT: vmov d1, r0, r0 +; NEON-NEXT: vmov.f32 s6, #-5.000000e-01 +; NEON-NEXT: vcmp.f32 s2, s0 ; NEON-NEXT: vmrs APSR_nzcv, fpscr -; NEON-NEXT: vselgt.f32 s0, s4, s2 +; NEON-NEXT: vselgt.f32 s0, s6, s4 ; NEON-NEXT: bx lr %1 = fcmp nsz olt float %a0, 128.000000e+00 %2 = select i1 %1, float -5.000000e-01, float 5.000000e-01 diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll index 1988cb1d2f9039..7a1d5ddfa301b6 100644 --- a/llvm/test/CodeGen/ARM/fp16-instructions.ll +++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll @@ -700,9 +700,9 @@ define half @select_cc1(ptr %a0) { ; CHECK-LABEL: select_cc1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 s0, +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -728,9 +728,9 @@ define half @select_cc_ge1(ptr %a0) { ; CHECK-LABEL: select_cc_ge1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselge.f16 s0, ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -751,9 +751,9 @@ define half @select_cc_ge2(ptr %a0) { ; CHECK-LABEL: select_cc_ge2: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselge.f16 s0, ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -774,9 +774,9 @@ define half @select_cc_ge3(ptr %a0) { ; CHECK-LABEL: select_cc_ge3: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselge.f16 s0, ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -797,9 +797,9 @@ define half @select_cc_ge4(ptr %a0) { ; CHECK-LABEL: select_cc_ge4: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -821,9 +821,9 @@ define half @select_cc_gt1(ptr %a0) { ; CHECK-LABEL: select_cc_gt1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -844,9 +844,9 @@ define half @select_cc_gt2(ptr %a0) { ; CHECK-LABEL: select_cc_gt2: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -867,9 +867,9 @@ define half @select_cc_gt3(ptr %a0) { ; CHECK-LABEL: select_cc_gt3: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -890,9 +890,9 @@ define half @select_cc_gt4(ptr %a0) { ; CHECK-LABEL: select_cc_gt4: -; CHECK-HARDFP-FULLFP16: vcmp.f16 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}} ; CHECK-SOFTFP-FP16-A32: vcmp.f32 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr @@ -923,10 +923,10 @@ entry: ; CHECK-LABEL: select_cc4: ; CHECK-HARDFP-FULLFP16: vldr.16 [[S2:s[0-9]]], .LCPI{{.*}} +; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] ; CHECK-HARDFP-FULLFP16: vldr.16 [[S4:s[0-9]]], .LCPI{{.*}} +; CHECK-HARDFP-FULLFP16: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16: vmov.f16 [[S6:s[0-9]]], #-2.000000e+00 -; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, [[S2]] -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vseleq.f16 [[S0:s[0-9]]], [[S6]], [[S4]] ; CHECK-HARDFP-FULLFP16-NEXT: vselvs.f16 s0, [[S6]], [[S0]] diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll index 56e734c4404336..996b46c51ab361 100644 --- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll +++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll @@ -5,11 +5,11 @@ define half @fp16_vminnm_o(half %a, half %b) { ; CHECK-LABEL: fp16_vminnm_o: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f16 s0, r1 -; CHECK-NEXT: vmov.f16 s2, r0 -; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmov.f16 s0, r0 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr entry: @@ -37,11 +37,11 @@ entry: define half @fp16_vminnm_u(half %a, half %b) { ; CHECK-LABEL: fp16_vminnm_u: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f16 s0, r0 -; CHECK-NEXT: vmov.f16 s2, r1 -; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r0 +; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr entry: @@ -53,11 +53,11 @@ entry: define half @fp16_vminnm_ule(half %a, half %b) { ; CHECK-LABEL: fp16_vminnm_ule: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f16 s0, r0 -; CHECK-NEXT: vmov.f16 s2, r1 -; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmov.f16 s0, r1 +; CHECK-NEXT: vmov.f16 s2, r0 +; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f16 s0, s2, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr entry: @@ -69,11 +69,11 @@ entry: define half @fp16_vminnm_u_rev(half %a, half %b) { ; CHECK-LABEL: fp16_vminnm_u_rev: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f16 s0, r1 -; CHECK-NEXT: vmov.f16 s2, r0 -; CHECK-NEXT: vcmp.f16 s0, s2 +; CHECK-NEXT: vmov.f16 s0, r0 +; CHECK-NEXT: vmov.f16 s2, r1 +; CHECK-NEXT: vcmp.f16 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselge.f16 s0, s2, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll new file mode 100644 index 00000000000000..3e77ad65df9927 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s + +declare double @fn() + +define void @test(ptr %p, ptr %res) nounwind { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: vldr d8, [r0] +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcmp.f64 d8, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vneg.f64 d16, d8 +; CHECK-NEXT: vmov.f64 d17, d8 +; CHECK-NEXT: vmovne.f64 d17, d16 +; CHECK-NEXT: vstr d17, [r1] +; CHECK-NEXT: bl fn +; CHECK-NEXT: vcmp.f64 d8, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: eor r1, r1, #-2147483648 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmovne.f64 d16, d17 +; CHECK-NEXT: vstr d16, [r4] +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, pc} +entry: + %x = load double, ptr %p + %cmp = fcmp une double %x, 0.000000e+00 + %nx = fneg double %x + %sx = select i1 %cmp, double %nx, double %x + store double %sx, ptr %res + %y = call double @fn() + %ny = fneg double %y + %sy = select i1 %cmp, double %ny, double %y + store double %sy, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll index 4b27e804e6df9a..84f6ee276ba5f1 100644 --- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll @@ -258,11 +258,11 @@ define i13 @test_signed_i13_f32(float %f) nounwind { ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vldr s2, .LCPI2_0 +; VFP2-NEXT: vldr s6, .LCPI2_1 ; VFP2-NEXT: vcvt.s32.f32 s4, s0 ; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vldr s2, .LCPI2_1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s2 +; VFP2-NEXT: vcmp.f32 s0, s6 ; VFP2-NEXT: vmov r0, s4 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movwlt r0, #61440 @@ -358,11 +358,11 @@ define i16 @test_signed_i16_f32(float %f) nounwind { ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vldr s2, .LCPI3_0 +; VFP2-NEXT: vldr s6, .LCPI3_1 ; VFP2-NEXT: vcvt.s32.f32 s4, s0 ; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vldr s2, .LCPI3_1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s2 +; VFP2-NEXT: vcmp.f32 s0, s6 ; VFP2-NEXT: vmov r0, s4 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movwlt r0, #32768 @@ -458,11 +458,11 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vldr s2, .LCPI4_0 +; VFP2-NEXT: vldr s6, .LCPI4_1 ; VFP2-NEXT: vcvt.s32.f32 s4, s0 ; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vldr s2, .LCPI4_1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s2 +; VFP2-NEXT: vcmp.f32 s0, s6 ; VFP2-NEXT: vmov r0, s4 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movlt r0, #0 @@ -639,39 +639,31 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; ; VFP-LABEL: test_signed_i50_f32: ; VFP: @ %bb.0: -; VFP-NEXT: .save {r7, lr} -; VFP-NEXT: push {r7, lr} -; VFP-NEXT: .vsave {d8} -; VFP-NEXT: vpush {d8} -; VFP-NEXT: vmov s16, r0 +; VFP-NEXT: .save {r4, lr} +; VFP-NEXT: push {r4, lr} +; VFP-NEXT: mov r4, r0 ; VFP-NEXT: bl __aeabi_f2lz ; VFP-NEXT: vldr s0, .LCPI6_0 -; VFP-NEXT: vldr s2, .LCPI6_1 -; VFP-NEXT: vcmp.f32 s16, s0 +; VFP-NEXT: vmov s2, r4 +; VFP-NEXT: vldr s4, .LCPI6_1 +; VFP-NEXT: vcmp.f32 s2, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, s2 -; VFP-NEXT: itt lt +; VFP-NEXT: ittt lt ; VFP-NEXT: movlt r1, #0 ; VFP-NEXT: movtlt r1, #65534 +; VFP-NEXT: movlt r0, #0 +; VFP-NEXT: vcmp.f32 s2, s4 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: itt gt +; VFP-NEXT: ittt gt ; VFP-NEXT: movwgt r1, #65535 ; VFP-NEXT: movtgt r1, #1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt -; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vcmp.f32 s16, s2 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vcmp.f32 s16, s16 +; VFP-NEXT: vcmp.f32 s2, s2 ; VFP-NEXT: vmrs APSR_nzcv, fpscr ; VFP-NEXT: itt vs ; VFP-NEXT: movvs r0, #0 ; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: vpop {d8} -; VFP-NEXT: pop {r7, pc} +; VFP-NEXT: pop {r4, pc} ; VFP-NEXT: .p2align 2 ; VFP-NEXT: @ %bb.1: ; VFP-NEXT: .LCPI6_0: @@ -765,27 +757,18 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; VFP-NEXT: vldr s4, .LCPI7_1 ; VFP-NEXT: vcmp.f32 s2, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt +; VFP-NEXT: itt lt ; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt ; VFP-NEXT: movlt.w r1, #-2147483648 ; VFP-NEXT: vcmp.f32 s2, s4 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt +; VFP-NEXT: itt gt ; VFP-NEXT: mvngt r1, #-2147483648 +; VFP-NEXT: movgt.w r0, #-1 ; VFP-NEXT: vcmp.f32 s2, s2 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it vs +; VFP-NEXT: itt vs +; VFP-NEXT: movvs r0, #0 ; VFP-NEXT: movvs r1, #0 ; VFP-NEXT: pop {r4, pc} ; VFP-NEXT: .p2align 2 @@ -923,51 +906,24 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; VFP-NEXT: vldr s4, .LCPI8_1 ; VFP-NEXT: vcmp.f32 s2, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt +; VFP-NEXT: itttt lt ; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt ; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt ; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r2, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt ; VFP-NEXT: mvnlt r3, #7 ; VFP-NEXT: vcmp.f32 s2, s4 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt +; VFP-NEXT: itttt gt ; VFP-NEXT: movgt r3, #7 +; VFP-NEXT: movgt.w r2, #-1 +; VFP-NEXT: movgt.w r1, #-1 +; VFP-NEXT: movgt.w r0, #-1 ; VFP-NEXT: vcmp.f32 s2, s2 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it vs +; VFP-NEXT: itttt vs +; VFP-NEXT: movvs r0, #0 +; VFP-NEXT: movvs r1, #0 +; VFP-NEXT: movvs r2, #0 ; VFP-NEXT: movvs r3, #0 ; VFP-NEXT: pop {r4, pc} ; VFP-NEXT: .p2align 2 @@ -1108,51 +1064,24 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; VFP-NEXT: vldr s4, .LCPI9_1 ; VFP-NEXT: vcmp.f32 s2, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt +; VFP-NEXT: itttt lt ; VFP-NEXT: movlt r0, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r0, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt ; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r1, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s4 -; VFP-NEXT: it lt ; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s2 -; VFP-NEXT: it gt -; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s2, s0 -; VFP-NEXT: it vs -; VFP-NEXT: movvs r2, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt ; VFP-NEXT: movlt.w r3, #-2147483648 ; VFP-NEXT: vcmp.f32 s2, s4 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt +; VFP-NEXT: itttt gt ; VFP-NEXT: mvngt r3, #-2147483648 +; VFP-NEXT: movgt.w r2, #-1 +; VFP-NEXT: movgt.w r1, #-1 +; VFP-NEXT: movgt.w r0, #-1 ; VFP-NEXT: vcmp.f32 s2, s2 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it vs +; VFP-NEXT: itttt vs +; VFP-NEXT: movvs r0, #0 +; VFP-NEXT: movvs r1, #0 +; VFP-NEXT: movvs r2, #0 ; VFP-NEXT: movvs r3, #0 ; VFP-NEXT: pop {r4, pc} ; VFP-NEXT: .p2align 2 @@ -1451,15 +1380,15 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov d16, r0, r1 ; VFP2-NEXT: vldr d17, .LCPI12_0 +; VFP2-NEXT: vldr d18, .LCPI12_1 ; VFP2-NEXT: vcvt.s32.f64 s0, d16 ; VFP2-NEXT: vcmp.f64 d16, d17 -; VFP2-NEXT: vldr d17, .LCPI12_1 -; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: vcmp.f64 d16, d18 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movwlt r0, #61440 ; VFP2-NEXT: movtlt r0, #65535 -; VFP2-NEXT: vcmp.f64 d16, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: it gt ; VFP2-NEXT: movwgt r0, #4095 @@ -1568,15 +1497,15 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov d16, r0, r1 ; VFP2-NEXT: vldr d17, .LCPI13_0 +; VFP2-NEXT: vldr d18, .LCPI13_1 ; VFP2-NEXT: vcvt.s32.f64 s0, d16 ; VFP2-NEXT: vcmp.f64 d16, d17 -; VFP2-NEXT: vldr d17, .LCPI13_1 -; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: vcmp.f64 d16, d18 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movwlt r0, #32768 ; VFP2-NEXT: movtlt r0, #65535 -; VFP2-NEXT: vcmp.f64 d16, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: it gt ; VFP2-NEXT: movwgt r0, #32767 @@ -1685,15 +1614,15 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov d16, r0, r1 ; VFP2-NEXT: vldr d17, .LCPI14_0 +; VFP2-NEXT: vldr d18, .LCPI14_1 ; VFP2-NEXT: vcvt.s32.f64 s0, d16 ; VFP2-NEXT: vcmp.f64 d16, d17 -; VFP2-NEXT: vldr d17, .LCPI14_1 -; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: vcmp.f64 d16, d18 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: movtlt r0, #65532 -; VFP2-NEXT: vcmp.f64 d16, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: itt gt ; VFP2-NEXT: movwgt r0, #65535 @@ -1905,42 +1834,32 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; ; VFP2-LABEL: test_signed_i50_f64: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} -; VFP2-NEXT: vmov d8, r0, r1 +; VFP2-NEXT: .save {r4, r5, r7, lr} +; VFP2-NEXT: push {r4, r5, r7, lr} +; VFP2-NEXT: mov r4, r1 +; VFP2-NEXT: mov r5, r0 ; VFP2-NEXT: bl __aeabi_d2lz ; VFP2-NEXT: vldr d16, .LCPI16_0 -; VFP2-NEXT: vldr d17, .LCPI16_1 -; VFP2-NEXT: vcmp.f64 d8, d16 +; VFP2-NEXT: vmov d17, r5, r4 +; VFP2-NEXT: vldr d18, .LCPI16_1 +; VFP2-NEXT: vcmp.f64 d17, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itt lt +; VFP2-NEXT: ittt lt ; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movtlt r1, #65534 -; VFP2-NEXT: vcmp.f64 d8, d17 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f64 d17, d18 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itt gt +; VFP2-NEXT: ittt gt ; VFP2-NEXT: movwgt r1, #65535 ; VFP2-NEXT: movtgt r1, #1 -; VFP2-NEXT: vcmp.f64 d8, d8 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f64 d8, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d8, d8 +; VFP2-NEXT: vcmp.f64 d17, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itt vs ; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: pop {r4, r5, r7, pc} ; VFP2-NEXT: .p2align 3 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI16_0: @@ -2074,27 +1993,18 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; VFP2-NEXT: vldr d18, .LCPI17_1 ; VFP2-NEXT: vcmp.f64 d17, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itt lt ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt.w r1, #-2147483648 ; VFP2-NEXT: vcmp.f64 d17, d18 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itt gt ; VFP2-NEXT: mvngt r1, #-2147483648 +; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f64 d17, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itt vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: movvs r1, #0 ; VFP2-NEXT: pop {r4, r5, r7, pc} ; VFP2-NEXT: .p2align 3 @@ -2118,27 +2028,18 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; FP16-NEXT: vldr d2, .LCPI17_1 ; FP16-NEXT: vcmp.f64 d1, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itt lt ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f64 d1, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d1, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: vcmp.f64 d1, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt.w r1, #-2147483648 ; FP16-NEXT: vcmp.f64 d1, d2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itt gt ; FP16-NEXT: mvngt r1, #-2147483648 +; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f64 d1, d1 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs +; FP16-NEXT: itt vs +; FP16-NEXT: movvs r0, #0 ; FP16-NEXT: movvs r1, #0 ; FP16-NEXT: pop {r4, r5, r7, pc} ; FP16-NEXT: .p2align 3 @@ -2287,51 +2188,24 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; VFP2-NEXT: vldr d18, .LCPI18_1 ; VFP2-NEXT: vcmp.f64 d17, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: mvnlt r3, #7 ; VFP2-NEXT: vcmp.f64 d17, d18 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: movgt r3, #7 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f64 d17, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itttt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: movvs r2, #0 ; VFP2-NEXT: movvs r3, #0 ; VFP2-NEXT: pop {r4, r5, r7, pc} ; VFP2-NEXT: .p2align 3 @@ -2350,56 +2224,29 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; FP16-NEXT: mov r4, r1 ; FP16-NEXT: mov r5, r0 ; FP16-NEXT: bl __fixdfti -; FP16-NEXT: vldr d2, .LCPI18_0 -; FP16-NEXT: vmov d0, r5, r4 -; FP16-NEXT: vldr d1, .LCPI18_1 -; FP16-NEXT: vcmp.f64 d0, d2 +; FP16-NEXT: vldr d0, .LCPI18_0 +; FP16-NEXT: vmov d1, r5, r4 +; FP16-NEXT: vldr d2, .LCPI18_1 +; FP16-NEXT: vcmp.f64 d1, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f64 d0, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d0, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: vcmp.f64 d0, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vcmp.f64 d0, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vcmp.f64 d0, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: vcmp.f64 d0, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vcmp.f64 d0, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vcmp.f64 d0, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: vcmp.f64 d0, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: mvnlt r3, #7 -; FP16-NEXT: vcmp.f64 d0, d1 +; FP16-NEXT: vcmp.f64 d1, d2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: movgt r3, #7 -; FP16-NEXT: vcmp.f64 d0, d0 +; FP16-NEXT: movgt.w r2, #-1 +; FP16-NEXT: movgt.w r1, #-1 +; FP16-NEXT: movgt.w r0, #-1 +; FP16-NEXT: vcmp.f64 d1, d1 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs +; FP16-NEXT: itttt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: movvs r2, #0 ; FP16-NEXT: movvs r3, #0 ; FP16-NEXT: pop {r4, r5, r7, pc} ; FP16-NEXT: .p2align 3 @@ -2550,51 +2397,24 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; VFP2-NEXT: vldr d18, .LCPI19_1 ; VFP2-NEXT: vcmp.f64 d17, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt.w r3, #-2147483648 ; VFP2-NEXT: vcmp.f64 d17, d18 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt +; VFP2-NEXT: mvngt r3, #-2147483648 +; VFP2-NEXT: movgt.w r2, #-1 ; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f64 d17, d17 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itttt vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: vcmp.f64 d17, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r3, #-2147483648 -; VFP2-NEXT: vcmp.f64 d17, d18 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: mvngt r3, #-2147483648 -; VFP2-NEXT: vcmp.f64 d17, d17 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r3, #0 ; VFP2-NEXT: pop {r4, r5, r7, pc} ; VFP2-NEXT: .p2align 3 @@ -2613,56 +2433,29 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; FP16-NEXT: mov r4, r1 ; FP16-NEXT: mov r5, r0 ; FP16-NEXT: bl __fixdfti -; FP16-NEXT: vldr d2, .LCPI19_0 -; FP16-NEXT: vmov d0, r5, r4 -; FP16-NEXT: vldr d1, .LCPI19_1 -; FP16-NEXT: vcmp.f64 d0, d2 +; FP16-NEXT: vldr d0, .LCPI19_0 +; FP16-NEXT: vmov d1, r5, r4 +; FP16-NEXT: vldr d2, .LCPI19_1 +; FP16-NEXT: vcmp.f64 d1, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vcmp.f64 d0, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d0, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: vcmp.f64 d0, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vcmp.f64 d0, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vcmp.f64 d0, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: vcmp.f64 d0, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vcmp.f64 d0, d1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vcmp.f64 d0, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: vcmp.f64 d0, d2 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt.w r3, #-2147483648 -; FP16-NEXT: vcmp.f64 d0, d1 +; FP16-NEXT: vcmp.f64 d1, d2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: mvngt r3, #-2147483648 -; FP16-NEXT: vcmp.f64 d0, d0 +; FP16-NEXT: movgt.w r2, #-1 +; FP16-NEXT: movgt.w r1, #-1 +; FP16-NEXT: movgt.w r0, #-1 +; FP16-NEXT: vcmp.f64 d1, d1 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs +; FP16-NEXT: itttt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: movvs r2, #0 ; FP16-NEXT: movvs r3, #0 ; FP16-NEXT: pop {r4, r5, r7, pc} ; FP16-NEXT: .p2align 3 @@ -2949,11 +2742,11 @@ define i13 @test_signed_i13_f16(half %f) nounwind { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vldr s2, .LCPI22_0 +; VFP2-NEXT: vldr s6, .LCPI22_1 ; VFP2-NEXT: vcvt.s32.f32 s4, s0 ; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vldr s2, .LCPI22_1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s2 +; VFP2-NEXT: vcmp.f32 s0, s6 ; VFP2-NEXT: vmov r0, s4 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movwlt r0, #61440 @@ -3055,11 +2848,11 @@ define i16 @test_signed_i16_f16(half %f) nounwind { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vldr s2, .LCPI23_0 +; VFP2-NEXT: vldr s6, .LCPI23_1 ; VFP2-NEXT: vcvt.s32.f32 s4, s0 ; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vldr s2, .LCPI23_1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s2 +; VFP2-NEXT: vcmp.f32 s0, s6 ; VFP2-NEXT: vmov r0, s4 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movwlt r0, #32768 @@ -3161,11 +2954,11 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vldr s2, .LCPI24_0 +; VFP2-NEXT: vldr s6, .LCPI24_1 ; VFP2-NEXT: vcvt.s32.f32 s4, s0 ; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vldr s2, .LCPI24_1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s2 +; VFP2-NEXT: vcmp.f32 s0, s6 ; VFP2-NEXT: vmov r0, s4 ; VFP2-NEXT: itt lt ; VFP2-NEXT: movlt r0, #0 @@ -3357,40 +3150,32 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; ; VFP2-LABEL: test_signed_i50_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: mov r4, r0 ; VFP2-NEXT: bl __aeabi_f2lz ; VFP2-NEXT: vldr s0, .LCPI26_0 -; VFP2-NEXT: vldr s2, .LCPI26_1 -; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vmov s2, r4 +; VFP2-NEXT: vldr s4, .LCPI26_1 +; VFP2-NEXT: vcmp.f32 s2, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, s2 -; VFP2-NEXT: itt lt +; VFP2-NEXT: ittt lt ; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movtlt r1, #65534 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s2, s4 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: itt gt +; VFP2-NEXT: ittt gt ; VFP2-NEXT: movwgt r1, #65535 ; VFP2-NEXT: movtgt r1, #1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f32 s16, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f32 s16, s16 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: itt vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI26_0: @@ -3412,21 +3197,15 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI26_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: itt lt +; FP16-NEXT: ittt lt ; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movtlt r1, #65534 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: itt gt -; FP16-NEXT: movwgt r1, #65535 -; FP16-NEXT: movtgt r1, #1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: ittt gt +; FP16-NEXT: movwgt r1, #65535 +; FP16-NEXT: movtgt r1, #1 ; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 ; FP16-NEXT: vmrs APSR_nzcv, fpscr @@ -3531,27 +3310,18 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; VFP2-NEXT: vldr s4, .LCPI27_1 ; VFP2-NEXT: vcmp.f32 s2, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt +; VFP2-NEXT: itt lt ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt.w r1, #-2147483648 ; VFP2-NEXT: vcmp.f32 s2, s4 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itt gt ; VFP2-NEXT: mvngt r1, #-2147483648 +; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itt vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: movvs r1, #0 ; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 @@ -3575,27 +3345,18 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI27_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt +; FP16-NEXT: itt lt ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt.w r1, #-2147483648 ; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itt gt ; FP16-NEXT: mvngt r1, #-2147483648 +; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs +; FP16-NEXT: itt vs +; FP16-NEXT: movvs r0, #0 ; FP16-NEXT: movvs r1, #0 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -3737,51 +3498,24 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; VFP2-NEXT: vldr s4, .LCPI28_1 ; VFP2-NEXT: vcmp.f32 s2, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: mvnlt r3, #7 ; VFP2-NEXT: vcmp.f32 s2, s4 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: movgt r3, #7 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itttt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: movvs r2, #0 ; VFP2-NEXT: movvs r3, #0 ; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 @@ -3805,51 +3539,24 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI28_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt ; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt ; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: mvnlt r3, #7 ; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: movgt r3, #7 +; FP16-NEXT: movgt.w r2, #-1 +; FP16-NEXT: movgt.w r1, #-1 +; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs +; FP16-NEXT: itttt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: movvs r2, #0 ; FP16-NEXT: movvs r3, #0 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -3994,51 +3701,24 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; VFP2-NEXT: vldr s4, .LCPI29_1 ; VFP2-NEXT: vcmp.f32 s2, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r1, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s4 -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s2 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: it vs -; VFP2-NEXT: movvs r2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt ; VFP2-NEXT: movlt.w r3, #-2147483648 ; VFP2-NEXT: vcmp.f32 s2, s4 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: mvngt r3, #-2147483648 +; VFP2-NEXT: movgt.w r2, #-1 +; VFP2-NEXT: movgt.w r1, #-1 +; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it vs +; VFP2-NEXT: itttt vs +; VFP2-NEXT: movvs r0, #0 +; VFP2-NEXT: movvs r1, #0 +; VFP2-NEXT: movvs r2, #0 ; VFP2-NEXT: movvs r3, #0 ; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 @@ -4062,51 +3742,24 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; FP16-NEXT: vldr s2, .LCPI29_1 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt ; FP16-NEXT: movlt r0, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r0, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt ; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r1, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s2 -; FP16-NEXT: it lt ; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s16 -; FP16-NEXT: it gt -; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it vs -; FP16-NEXT: movvs r2, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt ; FP16-NEXT: movlt.w r3, #-2147483648 ; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: mvngt r3, #-2147483648 +; FP16-NEXT: movgt.w r2, #-1 +; FP16-NEXT: movgt.w r1, #-1 +; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it vs +; FP16-NEXT: itttt vs +; FP16-NEXT: movvs r0, #0 +; FP16-NEXT: movvs r1, #0 +; FP16-NEXT: movvs r2, #0 ; FP16-NEXT: movvs r3, #0 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll index 3438fb113015cb..14eb67104eddac 100644 --- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll @@ -503,8 +503,8 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; VFP-NEXT: vcmp.f32 s16, #0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr ; VFP-NEXT: itt lt -; VFP-NEXT: movlt r0, #0 ; VFP-NEXT: movlt r1, #0 +; VFP-NEXT: movlt r0, #0 ; VFP-NEXT: vcmp.f32 s16, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr ; VFP-NEXT: ittt gt @@ -586,19 +586,13 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; VFP-NEXT: vldr s0, .LCPI7_0 ; VFP-NEXT: vcmp.f32 s16, #0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt +; VFP-NEXT: itt lt +; VFP-NEXT: movlt r1, #0 ; VFP-NEXT: movlt r0, #0 ; VFP-NEXT: vcmp.f32 s16, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt +; VFP-NEXT: itt gt ; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r1, #-1 ; VFP-NEXT: vpop {d8} ; VFP-NEXT: pop {r7, pc} @@ -701,35 +695,17 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; VFP-NEXT: vldr s0, .LCPI8_0 ; VFP-NEXT: vcmp.f32 s16, #0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt +; VFP-NEXT: itttt lt +; VFP-NEXT: movlt r3, #0 +; VFP-NEXT: movlt r2, #0 +; VFP-NEXT: movlt r1, #0 ; VFP-NEXT: movlt r0, #0 ; VFP-NEXT: vcmp.f32 s16, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt +; VFP-NEXT: itttt gt ; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: it lt -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: it lt -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt -; VFP-NEXT: movlt r3, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt ; VFP-NEXT: movgt r3, #15 ; VFP-NEXT: vpop {d8} ; VFP-NEXT: pop {r7, pc} @@ -831,35 +807,17 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; VFP-NEXT: vldr s0, .LCPI9_0 ; VFP-NEXT: vcmp.f32 s16, #0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt +; VFP-NEXT: itttt lt +; VFP-NEXT: movlt r3, #0 +; VFP-NEXT: movlt r2, #0 +; VFP-NEXT: movlt r1, #0 ; VFP-NEXT: movlt r0, #0 ; VFP-NEXT: vcmp.f32 s16, s0 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt +; VFP-NEXT: itttt gt ; VFP-NEXT: movgt.w r0, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: it lt -; VFP-NEXT: movlt r1, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r1, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: it lt -; VFP-NEXT: movlt r2, #0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: vcmp.f32 s16, #0 -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r2, #-1 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it lt -; VFP-NEXT: movlt r3, #0 -; VFP-NEXT: vcmp.f32 s16, s0 -; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r3, #-1 ; VFP-NEXT: vpop {d8} ; VFP-NEXT: pop {r7, pc} @@ -1453,8 +1411,8 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; VFP2-NEXT: vldr d16, .LCPI16_0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: itt lt -; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: movlt r1, #0 +; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vcmp.f64 d8, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: ittt gt @@ -1564,19 +1522,13 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; VFP2-NEXT: vcmp.f64 d8, #0 ; VFP2-NEXT: vldr d16, .LCPI17_0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itt lt +; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vcmp.f64 d8, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itt gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r1, #-1 ; VFP2-NEXT: vpop {d8} ; VFP2-NEXT: pop {r7, pc} @@ -1597,19 +1549,13 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; FP16-NEXT: vcmp.f64 d8, #0 ; FP16-NEXT: vldr d0, .LCPI17_0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itt lt +; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f64 d8, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itt gt ; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r1, #-1 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -1719,35 +1665,17 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; VFP2-NEXT: vcmp.f64 d8, #0 ; VFP2-NEXT: vldr d16, .LCPI18_0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r3, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vcmp.f64 d8, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt r3, #15 ; VFP2-NEXT: vpop {d8} ; VFP2-NEXT: pop {r7, pc} @@ -1768,35 +1696,17 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; FP16-NEXT: vcmp.f64 d8, #0 ; FP16-NEXT: vldr d0, .LCPI18_0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r3, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f64 d8, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt r3, #15 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -1902,35 +1812,17 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; VFP2-NEXT: vcmp.f64 d8, #0 ; VFP2-NEXT: vldr d16, .LCPI19_0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r3, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vcmp.f64 d8, d16 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vcmp.f64 d8, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: vcmp.f64 d8, d16 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r3, #-1 ; VFP2-NEXT: vpop {d8} ; VFP2-NEXT: pop {r7, pc} @@ -1951,35 +1843,17 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; FP16-NEXT: vcmp.f64 d8, #0 ; FP16-NEXT: vldr d0, .LCPI19_0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r3, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f64 d8, d0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vcmp.f64 d8, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: vcmp.f64 d8, d0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r3, #-1 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -2526,27 +2400,25 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; ; VFP2-LABEL: test_signed_i50_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: mov r4, r0 ; VFP2-NEXT: bl __aeabi_f2ulz -; VFP2-NEXT: vldr s0, .LCPI26_0 -; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmov s0, r4 +; VFP2-NEXT: vldr s2, .LCPI26_0 +; VFP2-NEXT: vcmp.f32 s0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: itt lt -; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vcmp.f32 s0, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: ittt gt ; VFP2-NEXT: movwgt r1, #65535 ; VFP2-NEXT: movtgt r1, #3 ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI26_0: @@ -2566,8 +2438,8 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; FP16-NEXT: vcmp.f32 s16, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itt lt -; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: movlt r1, #0 +; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: ittt gt @@ -2642,32 +2514,24 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; ; VFP2-LABEL: test_signed_i64_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: mov r4, r0 ; VFP2-NEXT: bl __aeabi_f2ulz -; VFP2-NEXT: vldr s0, .LCPI27_0 -; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmov s0, r4 +; VFP2-NEXT: vldr s2, .LCPI27_0 +; VFP2-NEXT: vcmp.f32 s0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itt lt +; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vcmp.f32 s0, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt +; VFP2-NEXT: itt gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI27_0: @@ -2686,19 +2550,13 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; FP16-NEXT: vldr s0, .LCPI27_0 ; FP16-NEXT: vcmp.f32 s16, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itt lt +; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt +; FP16-NEXT: itt gt ; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r1, #-1 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -2794,48 +2652,28 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; ; VFP2-LABEL: test_signed_i100_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: mov r4, r0 ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: vldr s0, .LCPI28_0 -; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmov s0, r4 +; VFP2-NEXT: vldr s2, .LCPI28_0 +; VFP2-NEXT: vcmp.f32 s0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r3, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vcmp.f32 s0, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt r3, #15 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI28_0: @@ -2854,35 +2692,17 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; FP16-NEXT: vldr s0, .LCPI28_0 ; FP16-NEXT: vcmp.f32 s16, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r3, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it lt -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it lt -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt r3, #15 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} @@ -2977,48 +2797,28 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; ; VFP2-LABEL: test_signed_i128_f16: ; VFP2: @ %bb.0: -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: .vsave {d8} -; VFP2-NEXT: vpush {d8} +; VFP2-NEXT: .save {r4, lr} +; VFP2-NEXT: push {r4, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s16, r0 +; VFP2-NEXT: mov r4, r0 ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: vldr s0, .LCPI29_0 -; VFP2-NEXT: vcmp.f32 s16, #0 +; VFP2-NEXT: vmov s0, r4 +; VFP2-NEXT: vldr s2, .LCPI29_0 +; VFP2-NEXT: vcmp.f32 s0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt +; VFP2-NEXT: itttt lt +; VFP2-NEXT: movlt r3, #0 +; VFP2-NEXT: movlt r2, #0 +; VFP2-NEXT: movlt r1, #0 ; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 +; VFP2-NEXT: vcmp.f32 s0, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt +; VFP2-NEXT: itttt gt ; VFP2-NEXT: movgt.w r0, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r1, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s16, #0 -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r2, #-1 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r3, #0 -; VFP2-NEXT: vcmp.f32 s16, s0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r3, #-1 -; VFP2-NEXT: vpop {d8} -; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: pop {r4, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI29_0: @@ -3037,35 +2837,17 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; FP16-NEXT: vldr s0, .LCPI29_0 ; FP16-NEXT: vcmp.f32 s16, #0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt +; FP16-NEXT: itttt lt +; FP16-NEXT: movlt r3, #0 +; FP16-NEXT: movlt r2, #0 +; FP16-NEXT: movlt r1, #0 ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s0 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt +; FP16-NEXT: itttt gt ; FP16-NEXT: movgt.w r0, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it lt -; FP16-NEXT: movlt r1, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r1, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: it lt -; FP16-NEXT: movlt r2, #0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: vcmp.f32 s16, #0 -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r2, #-1 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it lt -; FP16-NEXT: movlt r3, #0 -; FP16-NEXT: vcmp.f32 s16, s0 -; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r3, #-1 ; FP16-NEXT: vpop {d8} ; FP16-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll index feb790821e8754..4003af5d44be81 100644 --- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll @@ -16,12 +16,12 @@ declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) define float @fminnum32_intrinsic(float %x, float %y) { ; ARMV7-LABEL: fminnum32_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s0, r0 -; ARMV7-NEXT: vmov s2, r1 -; ARMV7-NEXT: vcmp.f32 s0, s2 +; ARMV7-NEXT: vmov s0, r1 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vcmp.f32 s2, s0 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovlt.f32 s2, s0 -; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: vmovlt.f32 s0, s2 +; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fminnum32_intrinsic: @@ -102,12 +102,12 @@ define float @fminnum32_non_zero_intrinsic(float %x) { define float @fmaxnum32_intrinsic(float %x, float %y) { ; ARMV7-LABEL: fmaxnum32_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s0, r0 -; ARMV7-NEXT: vmov s2, r1 -; ARMV7-NEXT: vcmp.f32 s0, s2 +; ARMV7-NEXT: vmov s0, r1 +; ARMV7-NEXT: vmov s2, r0 +; ARMV7-NEXT: vcmp.f32 s2, s0 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovgt.f32 s2, s0 -; ARMV7-NEXT: vmov r0, s2 +; ARMV7-NEXT: vmovgt.f32 s0, s2 +; ARMV7-NEXT: vmov r0, s0 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fmaxnum32_intrinsic: @@ -160,12 +160,12 @@ define float @fmaxnum32_nsz_intrinsic(float %x, float %y) { define float @fmaxnum32_zero_intrinsic(float %x) { ; ARMV7-LABEL: fmaxnum32_zero_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov s2, r0 -; ARMV7-NEXT: vldr s0, .LCPI5_0 -; ARMV7-NEXT: vcmp.f32 s2, #0 +; ARMV7-NEXT: vmov s0, r0 +; ARMV7-NEXT: vldr s2, .LCPI5_0 +; ARMV7-NEXT: vcmp.f32 s0, #0 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmovgt.f32 s0, s2 -; ARMV7-NEXT: vmov r0, s0 +; ARMV7-NEXT: vmovgt.f32 s2, s0 +; ARMV7-NEXT: vmov r0, s2 ; ARMV7-NEXT: bx lr ; ARMV7-NEXT: .p2align 2 ; ARMV7-NEXT: @ %bb.1: @@ -425,12 +425,12 @@ define double@fmaxnum64_nsz_intrinsic(double %x, double %y) { define double @fmaxnum64_zero_intrinsic(double %x) { ; ARMV7-LABEL: fmaxnum64_zero_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vmov d17, r0, r1 -; ARMV7-NEXT: vcmp.f64 d17, #0 +; ARMV7-NEXT: vmov d16, r0, r1 +; ARMV7-NEXT: vcmp.f64 d16, #0 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmov.i32 d16, #0x0 -; ARMV7-NEXT: vmovgt.f64 d16, d17 -; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmov.i32 d17, #0x0 +; ARMV7-NEXT: vmovgt.f64 d17, d16 +; ARMV7-NEXT: vmov r0, r1, d17 ; ARMV7-NEXT: bx lr ; ; ARMV8-LABEL: fmaxnum64_zero_intrinsic: @@ -1065,18 +1065,18 @@ define <2 x double> @fminnumv264_one_zero_intrinsic(<2 x double> %x) { ; ; ARMV8M-LABEL: fminnumv264_one_zero_intrinsic: ; ARMV8M: @ %bb.0: -; ARMV8M-NEXT: vmov d3, r2, r3 -; ARMV8M-NEXT: vldr d1, .LCPI27_0 -; ARMV8M-NEXT: vcmp.f64 d3, #0 +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vldr d0, .LCPI27_0 +; ARMV8M-NEXT: vcmp.f64 d1, #0 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr ; ARMV8M-NEXT: vmov d2, r0, r1 -; ARMV8M-NEXT: vmov.f64 d0, #-1.000000e+00 -; ARMV8M-NEXT: vcmp.f64 d0, d2 -; ARMV8M-NEXT: vmovlt.f64 d1, d3 +; ARMV8M-NEXT: vmov.f64 d3, #-1.000000e+00 +; ARMV8M-NEXT: vcmp.f64 d3, d2 +; ARMV8M-NEXT: vmovlt.f64 d0, d1 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr -; ARMV8M-NEXT: vmov r2, r3, d1 -; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 -; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: vselgt.f64 d1, d2, d3 +; ARMV8M-NEXT: vmov r0, r1, d1 ; ARMV8M-NEXT: bx lr ; ARMV8M-NEXT: .p2align 3 ; ARMV8M-NEXT: @ %bb.1: @@ -1186,18 +1186,18 @@ define <2 x double> @fmaxnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y) define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) { ; ARMV7-LABEL: fmaxnumv264_zero_intrinsic: ; ARMV7: @ %bb.0: -; ARMV7-NEXT: vldr d17, .LCPI30_0 -; ARMV7-NEXT: vmov d18, r2, r3 -; ARMV7-NEXT: vmov d19, r0, r1 -; ARMV7-NEXT: vcmp.f64 d18, d17 +; ARMV7-NEXT: vmov d18, r0, r1 +; ARMV7-NEXT: vldr d16, .LCPI30_0 +; ARMV7-NEXT: vcmp.f64 d18, #0 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmov.i32 d16, #0x0 -; ARMV7-NEXT: vcmp.f64 d19, #0 +; ARMV7-NEXT: vmov d19, r2, r3 +; ARMV7-NEXT: vcmp.f64 d19, d16 +; ARMV7-NEXT: vmov.i32 d17, #0x0 ; ARMV7-NEXT: vmovgt.f64 d17, d18 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmov r2, r3, d17 +; ARMV7-NEXT: vmov r0, r1, d17 ; ARMV7-NEXT: vmovgt.f64 d16, d19 -; ARMV7-NEXT: vmov r0, r1, d16 +; ARMV7-NEXT: vmov r2, r3, d16 ; ARMV7-NEXT: bx lr ; ARMV7-NEXT: .p2align 3 ; ARMV7-NEXT: @ %bb.1: @@ -1225,26 +1225,26 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) { ; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic: ; ARMV8M: @ %bb.0: ; ARMV8M-NEXT: vmov d2, r0, r1 -; ARMV8M-NEXT: vldr d0, .LCPI30_0 +; ARMV8M-NEXT: vldr d1, .LCPI30_1 ; ARMV8M-NEXT: vcmp.f64 d2, #0 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr ; ARMV8M-NEXT: vmov d3, r2, r3 -; ARMV8M-NEXT: vcmp.f64 d3, d0 -; ARMV8M-NEXT: vldr d1, .LCPI30_1 -; ARMV8M-NEXT: vselgt.f64 d1, d2, d1 +; ARMV8M-NEXT: vcmp.f64 d3, d1 +; ARMV8M-NEXT: vldr d0, .LCPI30_0 +; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr -; ARMV8M-NEXT: vmov r0, r1, d1 -; ARMV8M-NEXT: vselgt.f64 d0, d3, d0 -; ARMV8M-NEXT: vmov r2, r3, d0 +; ARMV8M-NEXT: vmov r0, r1, d0 +; ARMV8M-NEXT: vselgt.f64 d1, d3, d1 +; ARMV8M-NEXT: vmov r2, r3, d1 ; ARMV8M-NEXT: bx lr ; ARMV8M-NEXT: .p2align 3 ; ARMV8M-NEXT: @ %bb.1: ; ARMV8M-NEXT: .LCPI30_0: -; ARMV8M-NEXT: .long 0 @ double -0 -; ARMV8M-NEXT: .long 2147483648 -; ARMV8M-NEXT: .LCPI30_1: ; ARMV8M-NEXT: .long 0 @ double 0 ; ARMV8M-NEXT: .long 0 +; ARMV8M-NEXT: .LCPI30_1: +; ARMV8M-NEXT: .long 0 @ double -0 +; ARMV8M-NEXT: .long 2147483648 %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) ret <2 x double> %a } diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll index 24ca9aeac7f2db..496a6c0f5acbbe 100644 --- a/llvm/test/CodeGen/ARM/select.ll +++ b/llvm/test/CodeGen/ARM/select.ll @@ -164,13 +164,13 @@ define double @f7(double %a, double %b) { ; CHECK-VFP-LABEL: f7: ; CHECK-VFP: @ %bb.0: ; CHECK-VFP-NEXT: vldr d17, .LCPI6_0 -; CHECK-VFP-NEXT: vmov d19, r0, r1 +; CHECK-VFP-NEXT: vmov d18, r0, r1 ; CHECK-VFP-NEXT: vmov.f64 d16, #-1.000000e+00 -; CHECK-VFP-NEXT: vcmp.f64 d19, d17 +; CHECK-VFP-NEXT: vcmp.f64 d18, d17 ; CHECK-VFP-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-VFP-NEXT: vmov d18, r2, r3 -; CHECK-VFP-NEXT: vmovmi.f64 d18, d16 -; CHECK-VFP-NEXT: vmov r0, r1, d18 +; CHECK-VFP-NEXT: vmov d19, r2, r3 +; CHECK-VFP-NEXT: vmovmi.f64 d19, d16 +; CHECK-VFP-NEXT: vmov r0, r1, d19 ; CHECK-VFP-NEXT: bx lr ; CHECK-VFP-NEXT: .p2align 3 ; CHECK-VFP-NEXT: @ %bb.1: @@ -181,14 +181,14 @@ define double @f7(double %a, double %b) { ; CHECK-NEON-LABEL: f7: ; CHECK-NEON: @ %bb.0: ; CHECK-NEON-NEXT: vldr d17, LCPI6_0 -; CHECK-NEON-NEXT: vmov d19, r0, r1 -; CHECK-NEON-NEXT: vmov d18, r2, r3 -; CHECK-NEON-NEXT: vcmp.f64 d19, d17 +; CHECK-NEON-NEXT: vmov d18, r0, r1 +; CHECK-NEON-NEXT: vmov d19, r2, r3 +; CHECK-NEON-NEXT: vcmp.f64 d18, d17 ; CHECK-NEON-NEXT: vmov.f64 d16, #-1.000000e+00 ; CHECK-NEON-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEON-NEXT: it mi -; CHECK-NEON-NEXT: vmovmi.f64 d18, d16 -; CHECK-NEON-NEXT: vmov r0, r1, d18 +; CHECK-NEON-NEXT: vmovmi.f64 d19, d16 +; CHECK-NEON-NEXT: vmov r0, r1, d19 ; CHECK-NEON-NEXT: bx lr ; CHECK-NEON-NEXT: .p2align 3 ; CHECK-NEON-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/DirectX/asdouble.ll b/llvm/test/CodeGen/DirectX/asdouble.ll new file mode 100644 index 00000000000000..6a581d69eb7e9d --- /dev/null +++ b/llvm/test/CodeGen/DirectX/asdouble.ll @@ -0,0 +1,22 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Test that for scalar and vector inputs, asdouble maps down to the makeDouble +; DirectX op + +define noundef double @asdouble_scalar(i32 noundef %low, i32 noundef %high) { +; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low, i32 %high) + %ret = call double @llvm.dx.asdouble.i32(i32 %low, i32 %high) + ret double %ret +} + +declare double @llvm.dx.asdouble.i32(i32, i32) + +define noundef <3 x double> @asdouble_vec(<3 x i32> noundef %low, <3 x i32> noundef %high) { +; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low.i0, i32 %high.i0) +; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low.i1, i32 %high.i1) +; CHECK: call double @dx.op.makeDouble(i32 101, i32 %low.i2, i32 %high.i2) + %ret = call <3 x double> @llvm.dx.asdouble.v3i32(<3 x i32> %low, <3 x i32> %high) + ret <3 x double> %ret +} + +declare <3 x double> @llvm.dx.asdouble.v3i32(<3 x i32>, <3 x i32>) diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll index 2097523a61c5f9..1a8c9ede8f8b73 100644 --- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll +++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll @@ -2,12 +2,21 @@ ; REQUIRES: x86-registered-target ; COM: Machine function splitting with FDO profiles -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86,MFS-NOBBSECTIONS ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 ; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 +; COM: Machine function splitting along with -basic-block-sections profile +; RUN: echo 'v1' > %t +; RUN: echo 'ffoo21' >> %t +; RUN: echo 'c0' >> %t +; RUN: echo 'ffoo22' >> %t +; RUN: echo 'c0 1' >> %t +; RUN: echo 'c2' >> %t +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -basic-block-sections=%t -split-machine-functions | FileCheck %s --check-prefixes=MFS-BBSECTIONS + ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 @@ -610,6 +619,61 @@ cold_asm_target: ret void } +define void @foo21(i1 zeroext %0) { +;; Check that a function with basic-block-sections profile (but no pgo profile) +;; is properly split when the profile is used along with mfs. +; MFS-BBSECTIONS: .section .text.hot.foo21 +; MFS-NOBBSECTIONS-NOT: .section .text.hot.foo21 +; MFS-BBSECTIONS-LABEL: foo21: +; MFS-NOBBSECTIONS-NOT: foo21.cold: +; MFS-BBSECTIONS: .section .text.split.foo21 +; MFS-BBSECTIONS: foo21.cold + %2 = alloca i8, align 1 + %3 = zext i1 %0 to i8 + store i8 %3, ptr %2, align 1 + %4 = load i8, ptr %2, align 1 + %5 = trunc i8 %4 to i1 + br i1 %5, label %6, label %8 + +6: ; preds = %1 + %7 = call i32 @bar() + br label %10 + +8: ; preds = %1 + %9 = call i32 @baz() + br label %10 + +10: ; preds = %8, %6 + ret void +} + +define void @foo22(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that when a function has both basic-block-section and pgo profiles +;; only the basic-block-section profile is used for splitting. + +;; Check that we create two hot sections with -basic-block-sections. +; MFS-BBSECTIONS: .section .text.hot.foo22 +; MFS-BBSECTIONS-LABEL: foo22: +; MFS-BBSECTIONS: callq bar +; MFS-BBSECTIONS: .section .text.hot.foo22 +; MFS-BBSECTIONS-NEXT: foo22.__part.1: +; MFS-BBSECTIONS: callq baz +; MFS-BBSECTIONS-NOT: .section .text.split.foo22 + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() diff --git a/llvm/test/CodeGen/Hexagon/load-widen.ll b/llvm/test/CodeGen/Hexagon/load-widen.ll new file mode 100644 index 00000000000000..6fe47a57b89f09 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/load-widen.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -disable-load-widen < %s | FileCheck %s --check-prefix=CHECK-DISABLE + +%struct.node32 = type { ptr, ptr } + +%struct.node16_4 = type { i16, i16, i16, i16 } + +define void @test1(ptr nocapture %node) nounwind { +entry: +; There should be a memd and not two memw +; CHECK-LABEL: test1 +; CHECK: memd + %0 = load ptr, ptr %node, align 8 + %cgep = getelementptr inbounds %struct.node32, ptr %node, i32 0, i32 1 + %1 = load ptr, ptr %cgep, align 4 + store ptr %0, ptr %1, align 8 + ret void +} + +define void @test2(ptr nocapture %node) nounwind { +entry: +; Same as test1 but with load widening disabled. +; CHECK-DISABLE-LABEL: test2 +; CHECK-DISABLE: memw +; CHECK-DISABLE: memw + %0 = load ptr, ptr %node, align 8 + %cgep = getelementptr inbounds %struct.node32, ptr %node, i32 0, i32 1 + %1 = load ptr, ptr %cgep, align 4 + store ptr %0, ptr %1, align 8 + ret void +} + +define void @test3(ptr nocapture %node) nounwind { +entry: +; No memd because first load is not 8 byte aligned +; CHECK-LABEL: test3 +; CHECK-NOT: memd + %0 = load ptr, ptr %node, align 4 + %cgep = getelementptr inbounds %struct.node32, ptr %node, i32 0, i32 1 + %1 = load ptr, ptr %cgep, align 4 + store ptr %0, ptr %1, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll index 6c04e7a1e6ea44..d5d2da4d1056b1 100644 --- a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll +++ b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll @@ -1,20 +1,16 @@ -; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s -; CHECK-NOT: memh -; Check that store widening does not merge the two stores. +; RUN: llc -march=hexagon --combiner-store-merging=false -verify-machineinstrs < %s | FileCheck %s +; CHECK: memh +; Check that store widening merges the two adjacent stores. -target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" target triple = "hexagon" %struct.type_t = type { i8, i8, [2 x i8] } define zeroext i8 @foo(ptr nocapture %p) nounwind { entry: - store i8 0, ptr %p, align 2, !tbaa !0 + store i8 0, ptr %p, align 2 %b = getelementptr inbounds %struct.type_t, ptr %p, i32 0, i32 1 - %0 = load i8, ptr %b, align 1, !tbaa !0 - store i8 0, ptr %b, align 1, !tbaa !0 + %0 = load i8, ptr %b, align 1 + store i8 0, ptr %b, align 1 ret i8 %0 } - -!0 = !{!"omnipotent char", !1} -!1 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/Hexagon/widen-alias.ll b/llvm/test/CodeGen/Hexagon/widen-alias.ll new file mode 100644 index 00000000000000..4f849286546235 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widen-alias.ll @@ -0,0 +1,97 @@ +; Check the memd loads are generated by HexagonLoadStoreWidening pass +; Check that memw loads from adjacent memory location are replaced with memd, +; though the load/stores alias with instructions that occur later in the block. +; The order of memory operations remains unchanged. + +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: load_store_interleaved: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @load_store_interleaved(ptr %p, float %a, float %b) local_unnamed_addr { +entry: + %0 = load float, ptr %p, align 8 + %add0 = fadd float %0, %a + store float %add0, ptr %p, align 8 + %q = getelementptr i8, ptr %p, i32 4 + %1 = load float, ptr %q, align 4 + %add1 = fadd float %1, %b + store float %add1, ptr %q, align 4 + ret void +} + +; Store can be widened here, but this order of instructions is not currently handled +; CHECK-LABEL: loads_between_stores: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK-NOT: memd(r{{[0-9]+}}+#4) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @loads_between_stores(ptr %p, float %a, float %b) local_unnamed_addr { +entry: + %add0 = fadd float %b, %a + %q = getelementptr i8, ptr %p, i32 4 + %r = getelementptr i8, ptr %p, i32 8 + store float %add0, ptr %r, align 4 + %0 = load float, ptr %p, align 8 + %1 = load float, ptr %q, align 4 + %add1 = fadd float %1, %0 + store float %add1, ptr %q, align 8 + ret void +} + +; CHECK-LABEL: loads_before_stores: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @loads_before_stores(ptr %p, float %a, float %b) local_unnamed_addr { +entry: + %0 = load float, ptr %p, align 8 + %q = getelementptr i8, ptr %p, i32 4 + %1 = load float, ptr %q, align 4 + %add0 = fadd float %0, %a + store float %add0, ptr %p, align 8 + %add1 = fadd float %1, %b + store float %add1, ptr %q, align 4 + ret void +} + +; Store can be widened here, but this order of instructions is not currently handled +; CHECK-LABEL: store_load_interleaved: +; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK-NOT: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @store_load_interleaved(ptr %p, float %a, float %b, float %f) local_unnamed_addr { +entry: + %q = getelementptr i8, ptr %p, i32 4 + %r = getelementptr i8, ptr %p, i32 8 + store float %f, ptr %r, align 4 + %0 = load float, ptr %p, align 8 + %add0 = fadd float %0, %a + store float %add0, ptr %p, align 8 + %1 = load float, ptr %q, align 4 + %add1 = fadd float %1, %b + %add2 = fadd float %add1, %add0 + store float %add2, ptr %q, align 8 + ret void +} + +; CHECK-LABEL: stores_between_loads: +; CHECK-NOT: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; Function Attrs: mustprogress nounwind +define linkonce_odr dso_local void @stores_between_loads(ptr %p, float %a, float %b, float %f) local_unnamed_addr { +entry: + %0 = load float, ptr %p, align 8 + %add0 = fadd float %f, %0 + store float %add0, ptr %p, align 8 + %q = getelementptr i8, ptr %p, i32 4 + %add1 = fadd float %f, %b + store float %add1, ptr %q, align 8 + %r = getelementptr i8, ptr %p, i32 8 + %1 = load float, ptr %r, align 4 + %add2 = fadd float %add1, %1 + store float %add2, ptr %r, align 4 + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/widen-not-load.ll b/llvm/test/CodeGen/Hexagon/widen-not-load.ll new file mode 100644 index 00000000000000..5bf8b57054a915 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widen-not-load.ll @@ -0,0 +1,64 @@ +; Test that double word post increment load is not generated. +; REQUIRES: asserts + +; REQUIRES: asserts +; RUN: llc -march=hexagon -O2 -debug-only=hexagon-load-store-widening \ +; RUN: %s -o 2>&1 - | FileCheck %s + +; Loads with positive invalid postinc is not widened +define ptr @test1() { +; CHECK-LABEL: test1 +; CHECK-NOT: memd(r{{[0-9]+}}++ +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 20 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +; Loads with negative invalid postinc is not widened +define ptr @test2() { +; CHECK-LABEL: test2 +; CHECK-NOT: memd(r{{[0-9]+}}++ +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 -20 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +; Loads with valid positive postinc is widened +define ptr @test3() { +; CHECK-LABEL: test3 +; CHECK: memd +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 24 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +; Loads with valid negative postinc is widened +define ptr @test4() { +; CHECK-LABEL: test4 +; CHECK: memd +entry: + %0 = load ptr, ptr null, align 4 + %b = getelementptr i8, ptr %0, i32 -24 + %1 = load i32, ptr %0, align 8 + %c = getelementptr i8, ptr %0, i32 4 + %2 = load i32, ptr %c, align 4 + %call55 = call i8 @foo(ptr %b, i32 %1, i32 %2) + ret ptr null +} + +declare i8 @foo(ptr, i32, i32) diff --git a/llvm/test/CodeGen/Hexagon/widen-volatile.ll b/llvm/test/CodeGen/Hexagon/widen-volatile.ll new file mode 100644 index 00000000000000..540f517a6c96f8 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/widen-volatile.ll @@ -0,0 +1,34 @@ +; Check the volatile load/stores are not widened by HexagonLoadStoreWidening pass + +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: volatile_loads: +; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0) +; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+#4) +; CHECK-NOT: r{{[0-9]+}} = memd(r{{[0-9]+}}+#0) +define dso_local void @volatile_loads(ptr noundef %dst, ptr noundef %src0) local_unnamed_addr { +entry: + %0 = load volatile i32, ptr %src0, align 8 + %src1 = getelementptr i8, ptr %src0, i32 4 + %conv = zext i32 %0 to i64 + %1 = load volatile i32, ptr %src1, align 4 + %conv4 = zext i32 %1 to i64 + %shl = shl nuw i64 %conv4, 32 + %or = or disjoint i64 %shl, %conv + store i64 %or, ptr %dst, align 1 + ret void +} + +; CHECK-LABEL: volatile_stores: +; CHECK: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}} +; CHECK: memw(r{{[0-9]+}}+#4) = r{{[0-9]+}} +; CHECK-NOT: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}} +define dso_local void @volatile_stores(ptr noundef %dst0, i32 %a, i32 %b) local_unnamed_addr { +entry: + store volatile i32 %a, ptr %dst0, align 8 + %dst1 = getelementptr i8, ptr %dst0, i32 4 + store volatile i32 %b, ptr %dst1, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll new file mode 100644 index 00000000000000..2f7c93eb1c0de0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll @@ -0,0 +1,1323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32d | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64d | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These tests are each targeted at a particular RISC-V FPU instruction. +; Compares and conversions can be found in double-fcmp.ll and double-convert.ll +; respectively. Some other double-*.ll files in this folder exercise LLVM IR +; instructions that don't directly match a RISC-V instruction. + +define double @fadd_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fadd_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fadd.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fadd_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fadd_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd double %a, %b + ret double %1 +} + +define double @fsub_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fsub_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsub.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsub_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsub_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fsub double %a, %b + ret double %1 +} + +define double @fmul_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmul_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmul.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmul_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmul_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul double %a, %b + ret double %1 +} + +define double @fdiv_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fdiv_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fdiv.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fdiv_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __divdf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fdiv_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __divdf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fdiv double %a, %b + ret double %1 +} + +declare double @llvm.sqrt.f64(double) + +define double @fsqrt_d(double %a) nounwind { +; CHECKIFD-LABEL: fsqrt_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsqrt.d fa0, fa0 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsqrt_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsqrt_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.sqrt.f64(double %a) + ret double %1 +} + +declare double @llvm.copysign.f64(double, double) + +define double @fsgnj_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fsgnj_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsgnj_d: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a4, a2, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnj_d: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: slli a3, a2, 63 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = call double @llvm.copysign.f64(double %a, double %b) + ret double %1 +} + +define double @fsgnjn_d(double %a, double %b) nounwind { +; TODO: fsgnjn.s isn't selected on RV64 because DAGCombiner::visitBITCAST will +; convert (bitconvert (fneg x)) to a xor. +; +; CHECKIFD-LABEL: fsgnjn_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fsgnjn.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fsgnjn_d: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a3, a3, a2 +; RV32I-NEXT: addi a4, a2, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjn_d: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: slli a3, a2, 63 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: xor a1, a1, a3 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = fneg double %b + %2 = call double @llvm.copysign.f64(double %a, double %1) + ret double %2 +} + +declare double @llvm.fabs.f64(double) + +; This function performs extra work to ensure that +; DAGCombiner::visitBITCAST doesn't replace the fabs with an and. +define double @fabs_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fabs_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fadd.d fa5, fa0, fa1 +; CHECKIFD-NEXT: fabs.d fa4, fa5 +; CHECKIFD-NEXT: fadd.d fa0, fa4, fa5 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fabs_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd double %a, %b + %2 = call double @llvm.fabs.f64(double %1) + %3 = fadd double %2, %1 + ret double %3 +} + +declare double @llvm.minnum.f64(double, double) + +define double @fmin_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmin_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmin_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmin +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmin_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmin +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.minnum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.maxnum.f64(double, double) + +define double @fmax_d(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmax_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmax_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmax +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmax_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmax +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.maxnum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.fma.f64(double, double, double) + +define double @fmadd_d(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fmadd_d: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmadd_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %1 +} + +define double @fmsub_d(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fmsub_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fmsub.d fa0, fa0, fa1, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fmsub_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fmsub.d fa0, fa0, fa1, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmsub_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: lui a1, %hi(.LCPI11_0) +; RV32I-NEXT: addi a1, a1, %lo(.LCPI11_0) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a5, 524288 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd double 0.0, %c ; avoid negation using xor + %negc = fneg double %c_ + %1 = call double @llvm.fma.f64(double %a, double %b, double %negc) + ret double %1 +} + +define double @fnmadd_d(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fnmadd.d fa0, fa4, fa1, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fnmadd.d fa0, fa4, fa1, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI12_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI12_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a5, 524288 +; RV32I-NEXT: xor a2, s7, a5 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: ld s1, %lo(.LCPI12_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a2, a1, 63 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a + %c_ = fadd double 0.0, %c + %nega = fneg double %a_ + %negc = fneg double %c_ + %1 = call double @llvm.fma.f64(double %nega, double %b, double %negc) + ret double %1 +} + +define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d_2: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa1, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fnmadd.d fa0, fa4, fa0, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d_2: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa1, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fnmadd.d fa0, fa4, fa0, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI13_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI13_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a4, a0 +; RV32I-NEXT: lui a5, 524288 +; RV32I-NEXT: xor a3, s7, a5 +; RV32I-NEXT: xor a5, a1, a5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s6 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: ld s1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a2, a1, 63 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %b_ = fadd double 0.0, %b + %c_ = fadd double 0.0, %c + %negb = fneg double %b_ + %negc = fneg double %c_ + %1 = call double @llvm.fma.f64(double %a, double %negb, double %negc) + ret double %1 +} + +define double @fnmadd_d_3(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fnmadd_d_3: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa5, fa0, fa1, fa2 +; CHECKIFD-NEXT: fneg.d fa0, fa5 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg double %1 + ret double %neg +} + + +define double @fnmadd_nsz(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fnmadd_nsz: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa5, fa0, fa1, fa2 +; CHECKIFD-NEXT: fneg.d fa0, fa5 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call nsz double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg nsz double %1 + ret double %neg +} + +define double @fnmsub_d(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmsub_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa0, fa5 +; RV32IFD-NEXT: fnmsub.d fa0, fa5, fa1, fa2 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmsub_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa0, fa5 +; RV64IFD-NEXT: fnmsub.d fa0, fa5, fa1, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmsub_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI16_0) +; RV32I-NEXT: addi a3, a2, %lo(.LCPI16_0) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: lw a3, 4(a3) +; RV32I-NEXT: mv s3, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a4, s2 +; RV32I-NEXT: mv a5, s3 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI16_0) +; RV64I-NEXT: ld a1, %lo(.LCPI16_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a + %nega = fneg double %a_ + %1 = call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %1 +} + +define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmsub_d_2: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV32IFD-NEXT: fnmsub.d fa0, fa5, fa0, fa2 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmsub_d_2: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV64IFD-NEXT: fnmsub.d fa0, fa5, fa0, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmsub_d_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI17_0) +; RV32I-NEXT: addi a3, a2, %lo(.LCPI17_0) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: lw a3, 4(a3) +; RV32I-NEXT: mv s3, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: xor a3, a1, a3 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a4, s2 +; RV32I-NEXT: mv a5, s3 +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_d_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI17_0) +; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a1, a0, a1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %b_ = fadd double 0.0, %b + %negb = fneg double %b_ + %1 = call double @llvm.fma.f64(double %a, double %negb, double %c) + ret double %1 +} + +define double @fmadd_d_contract(double %a, double %b, double %c) nounwind { +; CHECKIFD-LABEL: fmadd_d_contract: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmadd_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a4 +; RV32I-NEXT: mv s1, a5 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul contract double %a, %b + %2 = fadd contract double %1, %c + ret double %2 +} + +define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fmsub_d_contract: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fmul.d fa4, fa0, fa1 +; RV32IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fmsub_d_contract: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fmul.d fa4, fa0, fa1 +; RV64IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fmsub_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: lui a1, %hi(.LCPI19_0) +; RV32I-NEXT: addi a1, a1, %lo(.LCPI19_0) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI19_0) +; RV64I-NEXT: ld a1, %lo(.LCPI19_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd double 0.0, %c ; avoid negation using xor + %1 = fmul contract double %a, %b + %2 = fsub contract double %1, %c_ + ret double %2 +} + +define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d_contract: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV32IFD-NEXT: fadd.d fa3, fa1, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV32IFD-NEXT: fmul.d fa4, fa4, fa3 +; RV32IFD-NEXT: fneg.d fa4, fa4 +; RV32IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d_contract: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV64IFD-NEXT: fadd.d fa3, fa1, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa2, fa5 +; RV64IFD-NEXT: fmul.d fa4, fa4, fa3 +; RV64IFD-NEXT: fneg.d fa4, fa4 +; RV64IFD-NEXT: fsub.d fa0, fa4, fa5 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI20_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI20_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, s7 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI20_0) +; RV64I-NEXT: ld s1, %lo(.LCPI20_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a ; avoid negation using xor + %b_ = fadd double 0.0, %b ; avoid negation using xor + %c_ = fadd double 0.0, %c ; avoid negation using xor + %1 = fmul contract double %a_, %b_ + %2 = fneg double %1 + %3 = fsub contract double %2, %c_ + ret double %3 +} + +define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmsub_d_contract: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw zero, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV32IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV32IFD-NEXT: fnmsub.d fa0, fa4, fa5, fa2 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmsub_d_contract: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x fa5, zero +; RV64IFD-NEXT: fadd.d fa4, fa0, fa5 +; RV64IFD-NEXT: fadd.d fa5, fa1, fa5 +; RV64IFD-NEXT: fnmsub.d fa0, fa4, fa5, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmsub_d_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: lui a2, %hi(.LCPI21_0) +; RV32I-NEXT: addi a2, a2, %lo(.LCPI21_0) +; RV32I-NEXT: lw s3, 0(a2) +; RV32I-NEXT: lw s4, 4(a2) +; RV32I-NEXT: mv s5, a5 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv s6, a0 +; RV32I-NEXT: mv s7, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a0, s6 +; RV32I-NEXT: mv a1, s7 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: call __subdf3 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_d_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI21_0) +; RV64I-NEXT: ld s1, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __subdf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd double 0.0, %a ; avoid negation using xor + %b_ = fadd double 0.0, %b ; avoid negation using xor + %1 = fmul contract double %a_, %b_ + %2 = fsub contract double %c, %1 + ret double %2 +} + +define double @fsgnjx_f64(double %x, double %y) nounwind { +; RV32IFD-LABEL: fsgnjx_f64: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lui a0, 261888 +; RV32IFD-NEXT: sw zero, 8(sp) +; RV32IFD-NEXT: sw a0, 12(sp) +; RV32IFD-NEXT: fld fa5, 8(sp) +; RV32IFD-NEXT: fsgnj.d fa5, fa5, fa0 +; RV32IFD-NEXT: fmul.d fa0, fa5, fa1 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fsgnjx_f64: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: li a0, 1023 +; RV64IFD-NEXT: slli a0, a0, 52 +; RV64IFD-NEXT: fmv.d.x fa5, a0 +; RV64IFD-NEXT: fsgnj.d fa5, fa5, fa0 +; RV64IFD-NEXT: fmul.d fa0, fa5, fa1 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fsgnjx_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: lui a4, 261888 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: or a1, a0, a4 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjx_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: li a3, 1023 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: slli a3, a3, 52 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call double @llvm.copysign.f64(double 1.0, double %x) + %mul = fmul double %z, %y + ret double %mul +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll index ad461f8f24b917..2b67d5c7ac5708 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll @@ -5,6 +5,10 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \ ; RUN: -verify-machineinstrs -target-abi=lp64d \ ; RUN: | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV32I %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV64I %s declare double @llvm.sqrt.f64(double) @@ -13,6 +17,24 @@ define double @sqrt_f64(double %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fsqrt.d fa0, fa0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: sqrt_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sqrt_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.sqrt.f64(double %a) ret double %1 } @@ -24,6 +46,24 @@ define double @fma_f64(double %a, double %b, double %c) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fma_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fma_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %1 } @@ -35,6 +75,38 @@ define double @fmuladd_f64(double %a, double %b, double %c) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fmuladd_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a4 +; RV32I-NEXT: mv s1, a5 +; RV32I-NEXT: call __muldf3 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: call __adddf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmuladd_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __muldf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __adddf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) ret double %1 } @@ -46,6 +118,20 @@ define double @fabs_f64(double %a) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fabs.d fa0, fa0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fabs_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret %1 = call double @llvm.fabs.f64(double %a) ret double %1 } @@ -57,6 +143,24 @@ define double @minnum_f64(double %a, double %b) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: minnum_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmin +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: minnum_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmin +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.minnum.f64(double %a, double %b) ret double %1 } @@ -68,6 +172,24 @@ define double @maxnum_f64(double %a, double %b) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: maxnum_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmax +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: maxnum_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmax +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.maxnum.f64(double %a, double %b) ret double %1 } @@ -79,6 +201,25 @@ define double @copysign_f64(double %a, double %b) nounwind { ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: copysign_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a4, a2, -1 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a2, a3, a2 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: copysign_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, -1 +; RV64I-NEXT: slli a3, a2, 63 +; RV64I-NEXT: srli a2, a2, 1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret %1 = call double @llvm.copysign.f64(double %a, double %b) ret double %1 } @@ -103,6 +244,24 @@ define double @floor_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: floor_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call floor +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: floor_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call floor +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.floor.f64(double %a) ret double %1 } @@ -127,6 +286,24 @@ define double @ceil_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: ceil_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ceil +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ceil_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ceil +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.ceil.f64(double %a) ret double %1 } @@ -151,6 +328,24 @@ define double @trunc_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: trunc_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call trunc +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: trunc_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call trunc +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.trunc.f64(double %a) ret double %1 } @@ -175,6 +370,24 @@ define double @rint_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: rint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call rint +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call rint +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.rint.f64(double %a) ret double %1 } @@ -199,6 +412,24 @@ define double @nearbyint_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: nearbyint_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call nearbyint +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: nearbyint_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call nearbyint +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.nearbyint.f64(double %a) ret double %1 } @@ -223,6 +454,24 @@ define double @round_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: round_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call round +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: round_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call round +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.round.f64(double %a) ret double %1 } @@ -247,6 +496,24 @@ define double @roundeven_f64(double %a) nounwind { ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret +; +; RV32I-LABEL: roundeven_f64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundeven +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: roundeven_f64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundeven +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call double @llvm.roundeven.f64(double %a) ret double %1 } @@ -259,6 +526,30 @@ define i1 @isnan_d_fpclass(double %x) { ; CHECKIFD-NEXT: andi a0, a0, 768 ; CHECKIFD-NEXT: snez a0, a0 ; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: isnan_d_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: lui a2, 524032 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: beq a1, a2, .LBB14_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a0, a2, a1 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB14_2: +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnan_d_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: li a2, 2047 +; RV64I-NEXT: srli a1, a1, 1 +; RV64I-NEXT: slli a2, a2, 52 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: sltu a0, a2, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan ret i1 %1 } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll new file mode 100644 index 00000000000000..7fe4d2ef797afd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll @@ -0,0 +1,1099 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIF %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These tests are each targeted at a particular RISC-V FPU instruction. +; Compares and conversions can be found in float-fcmp.ll and float-convert.ll +; respectively. Some other float-*.ll files in this folder exercise LLVM IR +; instructions that don't directly match a RISC-V instruction. + +define float @fadd_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fadd_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fadd.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fadd_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + ret float %1 +} + +define float @fsub_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fsub_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fsub.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsub_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fsub float %a, %b + ret float %1 +} + +define float @fmul_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fmul_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmul.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmul_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmul_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul float %a, %b + ret float %1 +} + +define float @fdiv_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fdiv_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fdiv.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fdiv_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __divsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fdiv_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __divsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fdiv float %a, %b + ret float %1 +} + +declare float @llvm.sqrt.f32(float) + +define float @fsqrt_s(float %a) nounwind { +; CHECKIF-LABEL: fsqrt_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fsqrt.s fa0, fa0 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsqrt_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrtf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsqrt_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrtf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +declare float @llvm.copysign.f32(float, float) + +define float @fsgnj_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fsgnj_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fsgnj.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsgnj_s: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnj_s: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a3, a2, -1 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +define float @fsgnjn_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fsgnjn_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fadd.s fa5, fa0, fa1 +; CHECKIF-NEXT: fsgnjn.s fa0, fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsgnjn_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: and a2, s0, a2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjn_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: addiw a2, a1, -1 +; RV64I-NEXT: and a2, s0, a2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + %2 = fneg float %1 + %3 = call float @llvm.copysign.f32(float %a, float %2) + ret float %3 +} + +declare float @llvm.fabs.f32(float) + +define float @fabs_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fabs_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fadd.s fa5, fa0, fa1 +; CHECKIF-NEXT: fabs.s fa4, fa5 +; CHECKIF-NEXT: fadd.s fa0, fa4, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fabs_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lui a0, 524288 +; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + %2 = call float @llvm.fabs.f32(float %1) + %3 = fadd float %2, %1 + ret float %3 +} + +declare float @llvm.minnum.f32(float, float) + +define float @fmin_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fmin_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmin.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmin_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fminf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmin_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fminf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.minnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.maxnum.f32(float, float) + +define float @fmax_s(float %a, float %b) nounwind { +; CHECKIF-LABEL: fmax_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmax.s fa0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmax_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaxf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmax_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaxf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.maxnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmadd_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmadd_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fmsub_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmsub_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fmsub.s fa0, fa0, fa1, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmsub_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a0, %hi(.LCPI11_0) +; RV32I-NEXT: lw a1, %lo(.LCPI11_0)(a0) +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a2, a0, a2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI11_0) +; RV64I-NEXT: lw a1, %lo(.LCPI11_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %negc = fneg float %c_ + %1 = call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmadd_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa1, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI12_0) +; RV32I-NEXT: lw s1, %lo(.LCPI12_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, s3, a2 +; RV32I-NEXT: xor a2, a0, a2 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: lw s1, %lo(.LCPI12_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a + %c_ = fadd float 0.0, %c + %nega = fneg float %a_ + %negc = fneg float %c_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmadd_s_2: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa1, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fnmadd.s fa0, fa4, fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI13_0) +; RV32I-NEXT: lw s1, %lo(.LCPI13_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, s3, a2 +; RV32I-NEXT: xor a2, a0, a2 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI13_0) +; RV64I-NEXT: lw s1, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a1, s3, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %b_ = fadd float 0.0, %b + %c_ = fadd float 0.0, %c + %negb = fneg float %b_ + %negc = fneg float %c_ + %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc) + ret float %1 +} + +define float @fnmadd_s_3(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fnmadd_s_3: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmadd.s ft0, fa0, fa1, fa2 +; RV32IF-NEXT: fneg.s fa0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fnmadd_s_3: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmadd.s ft0, fa0, fa1, fa2 +; RV64IF-NEXT: fneg.s fa0, ft0 +; RV64IF-NEXT: ret +; +; CHECKIF-LABEL: fnmadd_s_3: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2 +; CHECKIF-NEXT: fneg.s fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg float %1 + ret float %neg +} + +define float @fnmadd_nsz(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fnmadd_nsz: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fnmadd_nsz: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 +; RV64IF-NEXT: ret +; +; CHECKIF-LABEL: fnmadd_nsz: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa5, fa0, fa1, fa2 +; CHECKIF-NEXT: fneg.s fa0, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg nsz float %1 + ret float %neg +} + +define float @fnmsub_s(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmsub_s: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa0, fa5 +; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa1, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmsub_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI16_0) +; RV32I-NEXT: lw a1, %lo(.LCPI16_0)(a1) +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI16_0) +; RV64I-NEXT: lw a1, %lo(.LCPI16_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a + %nega = fneg float %a_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %c) + ret float %1 +} + +define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmsub_s_2: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa1, fa5 +; CHECKIF-NEXT: fnmsub.s fa0, fa5, fa0, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmsub_s_2: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI17_0) +; RV32I-NEXT: lw a1, %lo(.LCPI17_0)(a1) +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a1, a0, a1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_s_2: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI17_0) +; RV64I-NEXT: lw a1, %lo(.LCPI17_0)(a1) +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a1, a0, a1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %b_ = fadd float 0.0, %b + %negb = fneg float %b_ + %1 = call float @llvm.fma.f32(float %a, float %negb, float %c) + ret float %1 +} + +define float @fmadd_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmadd_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmadd.s fa0, fa0, fa1, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmadd_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmadd_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul contract float %a, %b + %2 = fadd contract float %1, %c + ret float %2 +} + +define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fmsub_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fmul.s fa4, fa0, fa1 +; CHECKIF-NEXT: fsub.s fa0, fa4, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fmsub_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a0, %hi(.LCPI19_0) +; RV32I-NEXT: lw a1, %lo(.LCPI19_0)(a0) +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmsub_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a0, %hi(.LCPI19_0) +; RV64I-NEXT: lw a1, %lo(.LCPI19_0)(a0) +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %1 = fmul contract float %a, %b + %2 = fsub contract float %1, %c_ + ret float %2 +} + +define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmadd_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 +; CHECKIF-NEXT: fadd.s fa3, fa1, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa2, fa5 +; CHECKIF-NEXT: fmul.s fa4, fa4, fa3 +; CHECKIF-NEXT: fneg.s fa4, fa4 +; CHECKIF-NEXT: fsub.s fa0, fa4, fa5 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI20_0) +; RV32I-NEXT: lw s1, %lo(.LCPI20_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI20_0) +; RV64I-NEXT: lw s1, %lo(.LCPI20_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a ; avoid negation using xor + %b_ = fadd float 0.0, %b ; avoid negation using xor + %c_ = fadd float 0.0, %c ; avoid negation using xor + %1 = fmul contract float %a_, %b_ + %2 = fneg float %1 + %3 = fsub contract float %2, %c_ + ret float %3 +} + +define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { +; CHECKIF-LABEL: fnmsub_s_contract: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fmv.w.x fa5, zero +; CHECKIF-NEXT: fadd.s fa4, fa0, fa5 +; CHECKIF-NEXT: fadd.s fa5, fa1, fa5 +; CHECKIF-NEXT: fnmsub.s fa0, fa4, fa5, fa2 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fnmsub_s_contract: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, %hi(.LCPI21_0) +; RV32I-NEXT: lw s1, %lo(.LCPI21_0)(a1) +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: call __subsf3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmsub_s_contract: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, %hi(.LCPI21_0) +; RV64I-NEXT: lw s1, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a ; avoid negation using xor + %b_ = fadd float 0.0, %b ; avoid negation using xor + %1 = fmul contract float %a_, %b_ + %2 = fsub contract float %c, %1 + ret float %2 +} + +define float @fsgnjx_f32(float %x, float %y) nounwind { +; CHECKIF-LABEL: fsgnjx_f32: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: lui a0, 260096 +; CHECKIF-NEXT: fmv.w.x fa5, a0 +; CHECKIF-NEXT: fsgnj.s fa5, fa5, fa0 +; CHECKIF-NEXT: fmul.s fa0, fa5, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fsgnjx_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: lui a3, 260096 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fsgnjx_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: lui a3, 260096 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %z = call float @llvm.copysign.f32(float 1.0, float %x) + %mul = fmul float %z, %y + ret float %mul +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll index 39a5beb317ab91..4d2b74ec735a1a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll @@ -11,6 +11,10 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \ ; RUN: -verify-machineinstrs -target-abi=lp64d \ ; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV32I %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel \ +; RUN: -verify-machineinstrs | FileCheck -check-prefix=RV64I %s define float @sqrt_f32(float %a) nounwind { ; RV32IF-LABEL: sqrt_f32: @@ -22,6 +26,24 @@ define float @sqrt_f32(float %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fsqrt.s fa0, fa0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: sqrt_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call sqrtf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: sqrt_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call sqrtf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.sqrt.f32(float %a) ret float %1 } @@ -36,6 +58,24 @@ define float @fma_f32(float %a, float %b, float %c) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fma_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fma_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %1 } @@ -50,6 +90,34 @@ define float @fmuladd_f32(float %a, float %b, float %c) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fmuladd_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: call __mulsf3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fmuladd_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ret float %1 } @@ -64,6 +132,20 @@ define float @fabs_f32(float %a) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fabs.s fa0, fa0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fabs_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fabs_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret %1 = call float @llvm.fabs.f32(float %a) ret float %1 } @@ -78,6 +160,24 @@ define float @minnum_f32(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmin.s fa0, fa0, fa1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: minnum_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fminf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: minnum_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fminf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.minnum.f32(float %a, float %b) ret float %1 } @@ -92,6 +192,24 @@ define float @maxnum_f32(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmax.s fa0, fa0, fa1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: maxnum_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaxf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: maxnum_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaxf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.maxnum.f32(float %a, float %b) ret float %1 } @@ -106,6 +224,24 @@ define float @copysign_f32(float %a, float %b) nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: fsgnj.s fa0, fa0, fa1 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: copysign_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a3, a2, -1 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: copysign_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a3, a2, -1 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret %1 = call float @llvm.copysign.f32(float %a, float %b) ret float %1 } @@ -128,6 +264,24 @@ define float @ceil_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: ceil_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call ceilf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ceil_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call ceilf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.ceil.f32(float %a) ret float %1 } @@ -150,6 +304,24 @@ define float @trunc_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: trunc_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call truncf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: trunc_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call truncf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.trunc.f32(float %a) ret float %1 } @@ -172,6 +344,24 @@ define float @rint_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: rint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call rintf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: rint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call rintf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.rint.f32(float %a) ret float %1 } @@ -194,6 +384,24 @@ define float @nearbyint_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: nearbyint_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call nearbyintf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: nearbyint_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call nearbyintf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.nearbyint.f32(float %a) ret float %1 } @@ -216,6 +424,24 @@ define float @round_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: round_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: round_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.round.f32(float %a) ret float %1 } @@ -238,6 +464,24 @@ define float @roundeven_f32(float %a) nounwind { ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: roundeven_f32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call roundevenf +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: roundeven_f32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call roundevenf +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret %1 = call float @llvm.roundeven.f32(float %a) ret float %1 } @@ -256,6 +500,68 @@ define i1 @fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 927 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: lui a3, 2048 +; RV32I-NEXT: lui a4, 1046528 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: addi a5, a1, -1 +; RV32I-NEXT: sltu a3, a5, a3 +; RV32I-NEXT: lui a5, 520192 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: add a4, a1, a4 +; RV32I-NEXT: sltu a4, a4, a5 +; RV32I-NEXT: xor a5, a1, a2 +; RV32I-NEXT: sltu a2, a2, a1 +; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: seqz a5, a5 +; RV32I-NEXT: and a3, a3, a0 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: slli a3, a0, 32 +; RV64I-NEXT: li a4, 1 +; RV64I-NEXT: lui a5, 2048 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: seqz a1, a0 +; RV64I-NEXT: xor a6, a0, a2 +; RV64I-NEXT: seqz a6, a6 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: lui a6, 520192 +; RV64I-NEXT: srli a3, a3, 32 +; RV64I-NEXT: xor a3, a3, a0 +; RV64I-NEXT: sub a4, a0, a4 +; RV64I-NEXT: sltu a2, a2, a0 +; RV64I-NEXT: sub a0, a0, a5 +; RV64I-NEXT: addiw a5, a5, -1 +; RV64I-NEXT: snez a3, a3 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a4, a4, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sltu a4, a4, a5 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: sltu a0, a0, a6 +; RV64I-NEXT: and a4, a4, a3 +; RV64I-NEXT: or a1, a1, a4 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret %cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639) ret i1 %cmp } @@ -274,6 +580,24 @@ define i1 @isnan_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 768 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isnan_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnan_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: sltu a0, a2, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan ret i1 %1 } @@ -292,6 +616,26 @@ define i1 @isqnan_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 512 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isqnan_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 523264 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isqnan_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 523264 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; qnan ret i1 %1 } @@ -310,6 +654,30 @@ define i1 @issnan_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 256 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: issnan_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: lui a1, 523264 +; RV32I-NEXT: sltu a2, a2, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: issnan_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 523264 +; RV64I-NEXT: sltu a2, a2, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: and a0, a2, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; snan ret i1 %1 } @@ -328,6 +696,26 @@ define i1 @isinf_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 129 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isinf_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isinf_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xor a0, a0, a2 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" ret i1 %1 } @@ -346,6 +734,22 @@ define i1 @isposinf_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 128 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isposinf_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 522240 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isposinf_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 522240 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" ret i1 %1 } @@ -364,6 +768,23 @@ define i1 @isneginf_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 1 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isneginf_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 1046528 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isneginf_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: li a1, 511 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a1, a1, 23 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" ret i1 %1 } @@ -382,6 +803,24 @@ define i1 @isfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 126 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: sltu a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: sltu a0, a0, a2 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" ret i1 %1 } @@ -400,6 +839,20 @@ define i1 @isposfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 112 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isposfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 522240 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isposfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 522240 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" ret i1 %1 } @@ -418,6 +871,32 @@ define i1 @isnegfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 14 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isnegfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a0, a1 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: sltu a1, a1, a2 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnegfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: sltu a1, a1, a2 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite" ret i1 %1 } @@ -436,6 +915,30 @@ define i1 @isnotfinite_fpclass(float %x) { ; RV64IF-NEXT: andi a0, a0, 897 ; RV64IF-NEXT: snez a0, a0 ; RV64IF-NEXT: ret +; +; RV32I-LABEL: isnotfinite_fpclass: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: lui a2, 522240 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xor a1, a0, a2 +; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: isnotfinite_fpclass: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: lui a2, 522240 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xor a1, a0, a2 +; RV64I-NEXT: seqz a1, a1 +; RV64I-NEXT: sltu a0, a2, a0 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ox207 = "inf|nan" ret i1 %1 } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 73311ae287e7db..74749d8f1944be 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -507,7 +507,6 @@ # DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices @@ -541,8 +540,8 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll index 1e6ff0baddaef2..957a23f0069b89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s ; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-max=128 -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s define @spill_lmul_1( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_1: @@ -35,6 +37,19 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O2-VLEN128-LABEL: spill_lmul_1: +; SPILL-O2-VLEN128: # %bb.0: # %entry +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16 +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 +; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: #APP +; SPILL-O2-VLEN128-NEXT: #NO_APP +; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 +; SPILL-O2-VLEN128-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -77,6 +92,19 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O2-VLEN128-LABEL: spill_lmul_2: +; SPILL-O2-VLEN128: # %bb.0: # %entry +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32 +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 +; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: #APP +; SPILL-O2-VLEN128-NEXT: #NO_APP +; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 +; SPILL-O2-VLEN128-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -119,6 +147,19 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O2-VLEN128-LABEL: spill_lmul_4: +; SPILL-O2-VLEN128: # %bb.0: # %entry +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -64 +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 +; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: #APP +; SPILL-O2-VLEN128-NEXT: #NO_APP +; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 +; SPILL-O2-VLEN128-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -161,6 +202,19 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O2-VLEN128-LABEL: spill_lmul_8: +; SPILL-O2-VLEN128: # %bb.0: # %entry +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -128 +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 +; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: #APP +; SPILL-O2-VLEN128-NEXT: #NO_APP +; SPILL-O2-VLEN128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 +; SPILL-O2-VLEN128-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-cfi-info.ll similarity index 100% rename from llvm/test/CodeGen/RISCV/rvv-cfi-info.ll rename to llvm/test/CodeGen/RISCV/rvv/rvv-cfi-info.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-fmas.ll b/llvm/test/CodeGen/Thumb2/mve-fmas.ll index 8016b940b8d514..377440e1bbc939 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmas.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmas.ll @@ -896,17 +896,17 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float ; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s0 -; CHECK-MVE-NEXT: vmov.f32 s12, s1 -; CHECK-MVE-NEXT: vmla.f32 s14, s4, s8 -; CHECK-MVE-NEXT: vmov.f32 s4, s3 -; CHECK-MVE-NEXT: vmov.f32 s8, s2 -; CHECK-MVE-NEXT: vmla.f32 s12, s5, s9 -; CHECK-MVE-NEXT: vmla.f32 s4, s7, s11 -; CHECK-MVE-NEXT: vmla.f32 s8, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s14, s2 +; CHECK-MVE-NEXT: vmov.f32 s12, s3 +; CHECK-MVE-NEXT: vmla.f32 s14, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s10, s1 +; CHECK-MVE-NEXT: vmla.f32 s12, s7, s11 +; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9 +; CHECK-MVE-NEXT: vmov.f32 s9, s0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s6, #0 @@ -914,13 +914,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s14 +; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: %0 = fmul <4 x float> %src2, %src3 @@ -949,17 +949,17 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float ; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s0 -; CHECK-MVE-NEXT: vmov.f32 s12, s1 -; CHECK-MVE-NEXT: vmla.f32 s14, s4, s8 -; CHECK-MVE-NEXT: vmov.f32 s4, s3 -; CHECK-MVE-NEXT: vmov.f32 s8, s2 -; CHECK-MVE-NEXT: vmla.f32 s12, s5, s9 -; CHECK-MVE-NEXT: vmla.f32 s4, s7, s11 -; CHECK-MVE-NEXT: vmla.f32 s8, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s14, s2 +; CHECK-MVE-NEXT: vmov.f32 s12, s3 +; CHECK-MVE-NEXT: vmla.f32 s14, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s10, s1 +; CHECK-MVE-NEXT: vmla.f32 s12, s7, s11 +; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9 +; CHECK-MVE-NEXT: vmov.f32 s9, s0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s6, #0 @@ -967,13 +967,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s14 +; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: %0 = fmul <4 x float> %src2, %src3 @@ -1002,17 +1002,17 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> % ; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s0 -; CHECK-MVE-NEXT: vmov.f32 s12, s1 -; CHECK-MVE-NEXT: vmls.f32 s14, s4, s8 -; CHECK-MVE-NEXT: vmov.f32 s4, s3 -; CHECK-MVE-NEXT: vmov.f32 s8, s2 -; CHECK-MVE-NEXT: vmls.f32 s12, s5, s9 -; CHECK-MVE-NEXT: vmls.f32 s4, s7, s11 -; CHECK-MVE-NEXT: vmls.f32 s8, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s14, s2 +; CHECK-MVE-NEXT: vmov.f32 s12, s3 +; CHECK-MVE-NEXT: vmls.f32 s14, s6, s10 +; CHECK-MVE-NEXT: vmov.f32 s10, s1 +; CHECK-MVE-NEXT: vmls.f32 s12, s7, s11 +; CHECK-MVE-NEXT: vmls.f32 s10, s5, s9 +; CHECK-MVE-NEXT: vmov.f32 s9, s0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmls.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s6, #0 @@ -1020,13 +1020,13 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> % ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s14 +; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: %0 = fmul <4 x float> %src2, %src3 @@ -1058,14 +1058,14 @@ define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float> ; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s12, s0 -; CHECK-MVE-NEXT: vmov.f32 s14, s2 -; CHECK-MVE-NEXT: vmov.f32 s10, s1 -; CHECK-MVE-NEXT: vmla.f32 s12, s4, s8 -; CHECK-MVE-NEXT: vmov.f32 s4, s3 -; CHECK-MVE-NEXT: vmla.f32 s14, s6, s8 -; CHECK-MVE-NEXT: vmla.f32 s10, s5, s8 -; CHECK-MVE-NEXT: vmla.f32 s4, s7, s8 +; CHECK-MVE-NEXT: vmov.f32 s10, s3 +; CHECK-MVE-NEXT: vmov.f32 s12, s2 +; CHECK-MVE-NEXT: vmov.f32 s14, s1 +; CHECK-MVE-NEXT: vmov.f32 s9, s0 +; CHECK-MVE-NEXT: vmla.f32 s10, s7, s8 +; CHECK-MVE-NEXT: vmla.f32 s12, s6, s8 +; CHECK-MVE-NEXT: vmla.f32 s14, s5, s8 +; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s7, #0 @@ -1076,13 +1076,13 @@ define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float> ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s10 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s14 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s12 +; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src3, i32 0 @@ -1115,13 +1115,13 @@ define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float> ; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s12, s8 ; CHECK-MVE-NEXT: vmov.f32 s10, s8 -; CHECK-MVE-NEXT: vmla.f32 s12, s0, s4 -; CHECK-MVE-NEXT: vmov.f32 s4, s8 -; CHECK-MVE-NEXT: vmla.f32 s8, s2, s6 -; CHECK-MVE-NEXT: vmla.f32 s10, s1, s5 -; CHECK-MVE-NEXT: vmla.f32 s4, s3, s7 +; CHECK-MVE-NEXT: vmov.f32 s12, s8 +; CHECK-MVE-NEXT: vmov.f32 s14, s8 +; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4 +; CHECK-MVE-NEXT: vmla.f32 s10, s3, s7 +; CHECK-MVE-NEXT: vmla.f32 s12, s2, s6 +; CHECK-MVE-NEXT: vmla.f32 s14, s1, s5 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s7, #0 @@ -1132,13 +1132,13 @@ define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float> ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s10 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s14 ; CHECK-MVE-NEXT: cmp r1, #0 -; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s12 +; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s8 ; CHECK-MVE-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src3, i32 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 81b6a6940a7d6b..75b6cb3e1272bc 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -42,54 +42,36 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f32_v2i32(<2 x float> %f) { ; CHECK-NEXT: vldr s20, .LCPI1_1 ; CHECK-NEXT: vcmp.f32 s17, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r4, #-1 ; CHECK-NEXT: movlt.w r5, #-2147483648 ; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: mvngt r5, #-2147483648 +; CHECK-NEXT: movgt r4, #0 ; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r0, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r0, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r1, #-1 ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: mvngt r0, #-2147483648 ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -1275,54 +1257,36 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r4, #-1 ; CHECK-NEXT: movlt.w r5, #-2147483648 ; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: mvngt r5, #-2147483648 +; CHECK-NEXT: movgt r4, #0 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r0, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r0, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r1, #-1 ; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: mvngt r0, #-2147483648 ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -1568,9 +1532,9 @@ define arm_aapcs_vfpcc <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4 ; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4 ; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s1, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 ; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s3, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s12 ; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 ; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 @@ -1588,10 +1552,10 @@ define arm_aapcs_vfpcc <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: vmov r2, s8 ; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-MVE-NEXT: vmov r3, s8 +; CHECK-MVE-NEXT: vmov r3, s4 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1627,9 +1591,9 @@ define arm_aapcs_vfpcc <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4 ; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4 ; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s1, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 ; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s3, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s12 ; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 ; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 @@ -1647,10 +1611,10 @@ define arm_aapcs_vfpcc <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: vmov r2, s8 ; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-MVE-NEXT: vmov r3, s8 +; CHECK-MVE-NEXT: vmov r3, s4 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1686,9 +1650,9 @@ define arm_aapcs_vfpcc <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) { ; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4 ; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4 ; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s1, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 ; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s3, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s12 ; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 ; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 @@ -1706,10 +1670,10 @@ define arm_aapcs_vfpcc <4 x i16> @test_signed_v4f32_v4i16(<4 x float> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: vmov r2, s8 ; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-MVE-NEXT: vmov r3, s8 +; CHECK-MVE-NEXT: vmov r3, s4 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1743,9 +1707,9 @@ define arm_aapcs_vfpcc <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4 ; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4 ; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s1, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 ; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s3, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s12 ; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 ; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 @@ -1763,10 +1727,10 @@ define arm_aapcs_vfpcc <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: vmov r2, s8 ; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-MVE-NEXT: vmov r3, s8 +; CHECK-MVE-NEXT: vmov r3, s4 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1821,146 +1785,122 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f32_v4i32_duplicate(<4 x float> define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i50: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vldr s20, .LCPI28_0 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov r6, s16 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vldr s22, .LCPI28_1 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vldr s22, .LCPI28_0 +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: vldr s20, .LCPI28_1 +; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movtlt r7, #65534 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: movwlt r11, #0 +; CHECK-NEXT: movtlt r11, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #1 +; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r11, #65535 +; CHECK-NEXT: movtgt r11, #1 +; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r8] -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 +; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r7, #65534 ; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #1 ; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: mov r6, r9 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsll r4, r1, #22 -; CHECK-NEXT: lsrl r6, r5, #28 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movwlt r10, #0 -; CHECK-NEXT: movtlt r10, #65534 -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: orrs r1, r5 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r10, #65535 -; CHECK-NEXT: movtgt r10, #1 -; CHECK-NEXT: str.w r1, [r8, #20] ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: orr.w r2, r6, r4 +; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: bfc r5, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt +; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: str.w r2, [r8, #16] -; CHECK-NEXT: lsrs r2, r7, #10 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: strb.w r2, [r8, #24] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: lsrl r2, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: orr.w r1, r1, r9, lsl #4 +; CHECK-NEXT: str.w r0, [r8] +; CHECK-NEXT: lsrs r0, r7, #10 +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: bfc r11, #18, #14 +; CHECK-NEXT: lsll r4, r7, #22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strd r2, r1, [r8, #8] +; CHECK-NEXT: orr.w r3, r5, r7 +; CHECK-NEXT: str.w r3, [r8, #20] +; CHECK-NEXT: orr.w r2, r2, r4 +; CHECK-NEXT: str.w r2, [r8, #16] +; CHECK-NEXT: strb.w r0, [r8, #24] +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: lsrl r0, r11, #14 +; CHECK-NEXT: orr.w r2, r11, r6, lsl #4 +; CHECK-NEXT: strd r0, r2, [r8, #8] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: bfc r10, #18, #14 -; CHECK-NEXT: orr.w r0, r10, r0, lsl #18 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: orr.w r0, r1, r9, lsl #18 ; CHECK-NEXT: str.w r0, [r8, #4] ; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI28_0: @@ -1974,131 +1914,98 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r11, r0 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s22, .LCPI29_0 -; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vldr s20, .LCPI29_1 -; CHECK-NEXT: vmov r8, s16 +; CHECK-NEXT: vmov r9, s17 ; CHECK-NEXT: vcmp.f32 s19, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r10, #-2147483648 +; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: mvngt r10, #-2147483648 ; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r4, s17 -; CHECK-NEXT: it vs +; CHECK-NEXT: vmov r8, s16 +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r9, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt.w r6, #-2147483648 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: it gt +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: itt gt ; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r4, #-2147483648 ; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: mvngt r4, #-2147483648 ; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmov q1[2], q1[0], r7, r10 +; CHECK-NEXT: vmov q1[2], q1[0], r7, r11 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r4, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r4, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r1, #-2147483648 ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q1[3], q1[1], r6, r10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: mvngt r1, #-2147483648 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmov q1[3], q1[1], r6, r9 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI29_0: @@ -2118,568 +2025,370 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r10, r3 -; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s22, .LCPI30_0 -; CHECK-NEXT: vmov r7, s17 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vldr s20, .LCPI30_1 -; CHECK-NEXT: vmov r4, s19 -; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str.w r2, [r9, #33] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: str r7, [sp] @ 4-byte Spill ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #29] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: mvnlt r5, #7 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt r5, #7 ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str.w r2, [r4, #33] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str.w r1, [r4, #29] +; CHECK-NEXT: vmov r1, s19 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9, #25] -; CHECK-NEXT: mov r0, r3 +; CHECK-NEXT: str.w r0, [r4, #25] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r8, #7 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt.w r8, #7 +; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt vs +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str.w r2, [r9, #8] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r9, #4] -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [r4, #8] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [r4, #4] +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9] -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r9, #28 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: orr.w r1, r9, r6, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #45] +; CHECK-NEXT: and r1, r8, #15 +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: and r0, r5, #15 +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: strb.w r6, [r4, #49] +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #37] +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: lsrl r0, r11, #28 +; CHECK-NEXT: orr.w r1, r11, r10, lsl #4 +; CHECK-NEXT: strd r0, r1, [r4, #16] +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: strb.w r10, [r4, #24] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: and r0, r3, #15 +; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: str r0, [r4, #12] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI30_0: +; CHECK-NEXT: .long 0xf1000000 @ float -6.338253E+29 +; CHECK-NEXT: .LCPI30_1: +; CHECK-NEXT: .long 0x70ffffff @ float 6.33825262E+29 + %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f) + ret <4 x i100> %x +} + +define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { +; CHECK-LABEL: test_signed_v4f32_v4i128: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vldr s22, .LCPI31_0 +; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: vldr s20, .LCPI31_1 +; CHECK-NEXT: vmov r6, s17 ; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: strd r5, r1, [r4, #48] +; CHECK-NEXT: strd r2, r3, [r4, #56] +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r10, #7 ; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #7 ; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: and r0, r10, #15 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 -; CHECK-NEXT: str.w r0, [r9, #37] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r11, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: and r7, r11, #15 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: orr.w r7, r7, r0, lsl #4 -; CHECK-NEXT: str.w r7, [r9, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r6, r5, #28 -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: orr.w r7, r5, r4, lsl #4 -; CHECK-NEXT: str.w r7, [r9, #45] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r6, [r9, #41] -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r8, #7 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r8, #7 -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: and r5, r8, #15 -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: lsrl r4, r5, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: strb.w r4, [r9, #49] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: strd r0, r1, [r9, #16] -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r9, #24] +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI30_0: -; CHECK-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; CHECK-NEXT: .LCPI30_1: -; CHECK-NEXT: .long 0x70ffffff @ float 6.33825262E+29 - %x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f) - ret <4 x i100> %x +; CHECK-NEXT: .LCPI31_0: +; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 +; CHECK-NEXT: .LCPI31_1: +; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 + %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f) + ret <4 x i128> %x } -define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { -; CHECK-LABEL: test_signed_v4f32_v4i128: +; +; 2-Vector double to signed integer -- result size variation +; + +declare <2 x i1> @llvm.fptosi.sat.v2f64.v2i1 (<2 x double>) +declare <2 x i8> @llvm.fptosi.sat.v2f64.v2i8 (<2 x double>) +declare <2 x i13> @llvm.fptosi.sat.v2f64.v2i13 (<2 x double>) +declare <2 x i16> @llvm.fptosi.sat.v2f64.v2i16 (<2 x double>) +declare <2 x i19> @llvm.fptosi.sat.v2f64.v2i19 (<2 x double>) +declare <2 x i50> @llvm.fptosi.sat.v2f64.v2i50 (<2 x double>) +declare <2 x i64> @llvm.fptosi.sat.v2f64.v2i64 (<2 x double>) +declare <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double>) +declare <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double>) + +define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { +; CHECK-LABEL: test_signed_v2f64_v2i1: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vmov r5, s18 -; CHECK-NEXT: vldr s22, .LCPI31_0 -; CHECK-NEXT: vldr s20, .LCPI31_1 -; CHECK-NEXT: vmov r7, s16 -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: str r3, [r4, #60] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: str r2, [r4, #56] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #52] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #48] -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r3, [r4, #44] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r2, [r4, #40] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #36] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #32] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: str r3, [r4, #28] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: str r2, [r4, #24] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #20] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str r3, [r4, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI31_0: -; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; CHECK-NEXT: .LCPI31_1: -; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 - %x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f) - ret <4 x i128> %x -} - -; -; 2-Vector double to signed integer -- result size variation -; - -declare <2 x i1> @llvm.fptosi.sat.v2f64.v2i1 (<2 x double>) -declare <2 x i8> @llvm.fptosi.sat.v2f64.v2i8 (<2 x double>) -declare <2 x i13> @llvm.fptosi.sat.v2f64.v2i13 (<2 x double>) -declare <2 x i16> @llvm.fptosi.sat.v2f64.v2i16 (<2 x double>) -declare <2 x i19> @llvm.fptosi.sat.v2f64.v2i19 (<2 x double>) -declare <2 x i50> @llvm.fptosi.sat.v2f64.v2i50 (<2 x double>) -declare <2 x i64> @llvm.fptosi.sat.v2f64.v2i64 (<2 x double>) -declare <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double>) -declare <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double>) - -define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { -; CHECK-LABEL: test_signed_v2f64_v2i1: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI32_0 -; CHECK-NEXT: vmov r8, r7, d8 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI32_1 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_d2iz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r11, #-1 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: vmov r6, r5, d9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: and r0, r11, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #12] @ 8-byte Folded Reload -; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: bfi r4, r0, #0, #1 +; CHECK-NEXT: vldr d0, .LCPI32_0 +; CHECK-NEXT: vmov r8, r7, d8 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: vldr d0, .LCPI32_1 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: bl __aeabi_d2iz +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r11, #-1 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: vmov r6, r5, d9 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: and r0, r11, #1 +; CHECK-NEXT: ldrd r2, r3, [sp, #12] @ 8-byte Folded Reload +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: bfi r4, r0, #0, #1 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt @@ -4244,33 +3953,37 @@ define arm_aapcs_vfpcc <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vcvtb.f32.f16 s15, s0 -; CHECK-NEXT: vmov.f32 s5, #-1.000000e+00 -; CHECK-NEXT: vldr s7, .LCPI42_0 -; CHECK-NEXT: vmaxnm.f32 s16, s15, s5 +; CHECK-NEXT: vmov.f32 s7, #-1.000000e+00 +; CHECK-NEXT: vldr s5, .LCPI42_0 +; CHECK-NEXT: vmaxnm.f32 s16, s15, s7 ; CHECK-NEXT: vcvtt.f32.f16 s12, s2 ; CHECK-NEXT: vcvtt.f32.f16 s9, s1 -; CHECK-NEXT: vminnm.f32 s16, s16, s7 +; CHECK-NEXT: vminnm.f32 s16, s16, s5 ; CHECK-NEXT: vcvtt.f32.f16 s4, s3 ; CHECK-NEXT: vcvt.s32.f32 s16, s16 ; CHECK-NEXT: vcvtb.f32.f16 s8, s3 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-NEXT: vcvtt.f32.f16 s0, s0 -; CHECK-NEXT: vmaxnm.f32 s6, s4, s5 -; CHECK-NEXT: vmaxnm.f32 s10, s8, s5 -; CHECK-NEXT: vmaxnm.f32 s14, s12, s5 -; CHECK-NEXT: vmaxnm.f32 s3, s2, s5 -; CHECK-NEXT: vmaxnm.f32 s11, s9, s5 -; CHECK-NEXT: vmaxnm.f32 s13, s1, s5 -; CHECK-NEXT: vmaxnm.f32 s5, s0, s5 -; CHECK-NEXT: vminnm.f32 s5, s5, s7 -; CHECK-NEXT: vminnm.f32 s13, s13, s7 -; CHECK-NEXT: vcvt.s32.f32 s5, s5 -; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxnm.f32 s6, s4, s7 +; CHECK-NEXT: vmaxnm.f32 s10, s8, s7 +; CHECK-NEXT: vmaxnm.f32 s14, s12, s7 +; CHECK-NEXT: vmaxnm.f32 s3, s2, s7 +; CHECK-NEXT: vmaxnm.f32 s11, s9, s7 +; CHECK-NEXT: vmaxnm.f32 s13, s1, s7 +; CHECK-NEXT: vmaxnm.f32 s7, s0, s7 +; CHECK-NEXT: vminnm.f32 s6, s6, s5 +; CHECK-NEXT: vminnm.f32 s10, s10, s5 +; CHECK-NEXT: vminnm.f32 s14, s14, s5 +; CHECK-NEXT: vminnm.f32 s3, s3, s5 +; CHECK-NEXT: vminnm.f32 s11, s11, s5 +; CHECK-NEXT: vminnm.f32 s13, s13, s5 +; CHECK-NEXT: vminnm.f32 s5, s7, s5 ; CHECK-NEXT: vcmp.f32 s15, s15 -; CHECK-NEXT: vminnm.f32 s11, s11, s7 -; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vcvt.s32.f32 s5, s5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vcvt.s32.f32 s13, s13 @@ -4280,60 +3993,56 @@ define arm_aapcs_vfpcc <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: bfi r1, r2, #0, #1 ; CHECK-NEXT: vcvt.s32.f32 s11, s11 +; CHECK-NEXT: vcmp.f32 s1, s1 ; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: vminnm.f32 s3, s3, s7 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcmp.f32 s1, s1 -; CHECK-NEXT: and r2, r2, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: and r2, r2, #1 ; CHECK-NEXT: vcvt.s32.f32 s3, s3 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vcmp.f32 s9, s9 ; CHECK-NEXT: bfi r1, r2, #1, #1 ; CHECK-NEXT: vmov r2, s13 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vminnm.f32 s14, s14, s7 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s9, s9 -; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: and r2, r2, #1 +; CHECK-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vcmp.f32 s2, s2 ; CHECK-NEXT: bfi r1, r2, #2, #1 ; CHECK-NEXT: vmov r2, s11 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vminnm.f32 s10, s10, s7 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vcmp.f32 s2, s2 +; CHECK-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vcmp.f32 s12, s12 ; CHECK-NEXT: bfi r1, r2, #3, #1 ; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vminnm.f32 s6, s6, s7 +; CHECK-NEXT: vcvt.s32.f32 s6, s6 +; CHECK-NEXT: rsb.w r2, r2, #0 +; CHECK-NEXT: vcmp.f32 s8, s8 ; CHECK-NEXT: bfi r1, r2, #4, #1 -; CHECK-NEXT: vcmp.f32 s12, s12 ; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s6, s6 -; CHECK-NEXT: and r2, r2, #1 -; CHECK-NEXT: vcmp.f32 s8, s8 -; CHECK-NEXT: rsbs r2, r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: and r2, r2, #1 +; CHECK-NEXT: vcmp.f32 s4, s4 +; CHECK-NEXT: rsb.w r2, r2, #0 ; CHECK-NEXT: bfi r1, r2, #5, #1 ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcmp.f32 s4, s4 -; CHECK-NEXT: and r2, r2, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: and r2, r2, #1 ; CHECK-NEXT: rsb.w r2, r2, #0 ; CHECK-NEXT: bfi r1, r2, #6, #1 ; CHECK-NEXT: vmov r2, s6 @@ -4360,40 +4069,40 @@ define arm_aapcs_vfpcc <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) { ; CHECK-MVE-NEXT: push {r4, r5, r7, lr} ; CHECK-MVE-NEXT: .vsave {d8} ; CHECK-MVE-NEXT: vpush {d8} -; CHECK-MVE-NEXT: vldr s8, .LCPI43_1 +; CHECK-MVE-NEXT: vldr s6, .LCPI43_1 ; CHECK-MVE-NEXT: vcvtt.f32.f16 s13, s3 ; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3 -; CHECK-MVE-NEXT: vldr s6, .LCPI43_0 -; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s8 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s4, s0 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1 +; CHECK-MVE-NEXT: vldr s4, .LCPI43_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s6 ; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2 -; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s8 -; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s6 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s6 +; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s4 ; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-MVE-NEXT: vmaxnm.f32 s10, s4, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s5, s0, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s11, s1, s8 -; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s6 +; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s11, s2, s6 ; CHECK-MVE-NEXT: vcvt.s32.f32 s16, s16 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s2, s8 -; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s6 -; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s6 -; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s6 -; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s6 -; CHECK-MVE-NEXT: vminnm.f32 s6, s8, s6 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s6 +; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s4 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-MVE-NEXT: vcvt.s32.f32 s15, s15 -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6 -; CHECK-MVE-NEXT: vcvt.s32.f32 s9, s9 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s0 +; CHECK-MVE-NEXT: vmaxnm.f32 s5, s1, s6 +; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s11, s11 -; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s6 +; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s9, s9 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0 +; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s4 +; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s5, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s0, s6 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 ; CHECK-MVE-NEXT: vcmp.f32 s3, s3 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r12, s16 @@ -4406,7 +4115,7 @@ define arm_aapcs_vfpcc <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs.w lr, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 +; CHECK-MVE-NEXT: vmov r2, s11 ; CHECK-MVE-NEXT: vcmp.f32 s7, s7 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #0 @@ -4416,22 +4125,22 @@ define arm_aapcs_vfpcc <8 x i8> @test_signed_v8f16_v8i8(<8 x half> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r3, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r0, s11 +; CHECK-MVE-NEXT: vmov r0, s5 ; CHECK-MVE-NEXT: vcmp.f32 s12, s12 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s14 -; CHECK-MVE-NEXT: vmov r4, s5 +; CHECK-MVE-NEXT: vmov r4, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #0 -; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: vcmp.f32 s8, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r4, #0 -; CHECK-MVE-NEXT: vcmp.f32 s4, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 ; CHECK-MVE-NEXT: vmov.16 q0[0], r4 -; CHECK-MVE-NEXT: vmov r5, s10 +; CHECK-MVE-NEXT: vmov r5, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r5, #0 @@ -4468,40 +4177,40 @@ define arm_aapcs_vfpcc <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) { ; CHECK-MVE-NEXT: push {r4, r5, r7, lr} ; CHECK-MVE-NEXT: .vsave {d8} ; CHECK-MVE-NEXT: vpush {d8} -; CHECK-MVE-NEXT: vldr s8, .LCPI44_1 +; CHECK-MVE-NEXT: vldr s6, .LCPI44_1 ; CHECK-MVE-NEXT: vcvtt.f32.f16 s13, s3 ; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3 -; CHECK-MVE-NEXT: vldr s6, .LCPI44_0 -; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s8 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s4, s0 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1 +; CHECK-MVE-NEXT: vldr s4, .LCPI44_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s6 ; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2 -; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s8 -; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s6 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s6 +; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s4 ; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-MVE-NEXT: vmaxnm.f32 s10, s4, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s5, s0, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s11, s1, s8 -; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s6 +; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s11, s2, s6 ; CHECK-MVE-NEXT: vcvt.s32.f32 s16, s16 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s2, s8 -; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s6 -; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s6 -; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s6 -; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s6 -; CHECK-MVE-NEXT: vminnm.f32 s6, s8, s6 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s6 +; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s4 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 ; CHECK-MVE-NEXT: vcvt.s32.f32 s15, s15 -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6 -; CHECK-MVE-NEXT: vcvt.s32.f32 s9, s9 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s0 +; CHECK-MVE-NEXT: vmaxnm.f32 s5, s1, s6 +; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s11, s11 -; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s6 +; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s9, s9 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0 +; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s4 +; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s5, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s0, s6 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 ; CHECK-MVE-NEXT: vcmp.f32 s3, s3 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r12, s16 @@ -4514,7 +4223,7 @@ define arm_aapcs_vfpcc <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs.w lr, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 +; CHECK-MVE-NEXT: vmov r2, s11 ; CHECK-MVE-NEXT: vcmp.f32 s7, s7 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r2, #0 @@ -4524,22 +4233,22 @@ define arm_aapcs_vfpcc <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) { ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r3, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r0, s11 +; CHECK-MVE-NEXT: vmov r0, s5 ; CHECK-MVE-NEXT: vcmp.f32 s12, s12 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r0, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov r1, s14 -; CHECK-MVE-NEXT: vmov r4, s5 +; CHECK-MVE-NEXT: vmov r4, s10 ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r1, #0 -; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: vcmp.f32 s8, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r4, #0 -; CHECK-MVE-NEXT: vcmp.f32 s4, s4 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 ; CHECK-MVE-NEXT: vmov.16 q0[0], r4 -; CHECK-MVE-NEXT: vmov r5, s10 +; CHECK-MVE-NEXT: vmov r5, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: it vs ; CHECK-MVE-NEXT: movvs r5, #0 @@ -4578,838 +4287,273 @@ define arm_aapcs_vfpcc <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) { ; CHECK-MVE-NEXT: push {r4, r5, r7, lr} ; CHECK-MVE-NEXT: .vsave {d8} ; CHECK-MVE-NEXT: vpush {d8} -; CHECK-MVE-NEXT: vldr s8, .LCPI45_1 +; CHECK-MVE-NEXT: vldr s6, .LCPI45_1 ; CHECK-MVE-NEXT: vcvtt.f32.f16 s13, s3 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3 -; CHECK-MVE-NEXT: vldr s6, .LCPI45_0 -; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s8 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s4, s0 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1 -; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2 -; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s8 -; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s6 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 -; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2 -; CHECK-MVE-NEXT: vmaxnm.f32 s10, s4, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s5, s0, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s8 -; CHECK-MVE-NEXT: vmaxnm.f32 s11, s1, s8 -; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s6 -; CHECK-MVE-NEXT: vcvt.s32.f32 s16, s16 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s2, s8 -; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s6 -; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s6 -; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s6 -; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s6 -; CHECK-MVE-NEXT: vminnm.f32 s6, s8, s6 -; CHECK-MVE-NEXT: vcvt.s32.f32 s15, s15 -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6 -; CHECK-MVE-NEXT: vcvt.s32.f32 s9, s9 -; CHECK-MVE-NEXT: vcvt.s32.f32 s11, s11 -; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 -; CHECK-MVE-NEXT: vcvt.s32.f32 s5, s5 -; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s10 -; CHECK-MVE-NEXT: vcmp.f32 s3, s3 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r12, s16 -; CHECK-MVE-NEXT: vcmp.f32 s13, s13 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs.w r12, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov lr, s15 -; CHECK-MVE-NEXT: vcmp.f32 s2, s2 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs.w lr, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r2, s6 -; CHECK-MVE-NEXT: vcmp.f32 s7, s7 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r2, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r3, s9 -; CHECK-MVE-NEXT: vcmp.f32 s1, s1 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r3, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r0, s11 -; CHECK-MVE-NEXT: vcmp.f32 s12, s12 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r0, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmov r1, s14 -; CHECK-MVE-NEXT: vmov r4, s5 -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r1, #0 -; CHECK-MVE-NEXT: vcmp.f32 s0, s0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r4, #0 -; CHECK-MVE-NEXT: vcmp.f32 s4, s4 -; CHECK-MVE-NEXT: vmov.16 q0[0], r4 -; CHECK-MVE-NEXT: vmov r5, s10 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: it vs -; CHECK-MVE-NEXT: movvs r5, #0 -; CHECK-MVE-NEXT: vmov.16 q0[1], r5 -; CHECK-MVE-NEXT: vmov.16 q0[2], r0 -; CHECK-MVE-NEXT: vmov.16 q0[3], r1 -; CHECK-MVE-NEXT: vmov.16 q0[4], r2 -; CHECK-MVE-NEXT: vmov.16 q0[5], r3 -; CHECK-MVE-NEXT: vmov.16 q0[6], r12 -; CHECK-MVE-NEXT: vmov.16 q0[7], lr -; CHECK-MVE-NEXT: vpop {d8} -; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} -; CHECK-MVE-NEXT: .p2align 2 -; CHECK-MVE-NEXT: @ %bb.1: -; CHECK-MVE-NEXT: .LCPI45_0: -; CHECK-MVE-NEXT: .long 0x46fffe00 @ float 32767 -; CHECK-MVE-NEXT: .LCPI45_1: -; CHECK-MVE-NEXT: .long 0xc7000000 @ float -32768 -; -; CHECK-MVEFP-LABEL: test_signed_v8f16_v8i16: -; CHECK-MVEFP: @ %bb.0: -; CHECK-MVEFP-NEXT: vcvt.s16.f16 q0, q0 -; CHECK-MVEFP-NEXT: bx lr - %x = call <8 x i16> @llvm.fptosi.sat.v8f16.v8i16(<8 x half> %f) - ret <8 x i16> %x -} - -define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i19: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr} -; CHECK-NEXT: vldr s4, .LCPI46_0 -; CHECK-NEXT: vcvtb.f32.f16 s8, s1 -; CHECK-NEXT: vcvtt.f32.f16 s12, s1 -; CHECK-NEXT: vcvtt.f32.f16 s1, s0 -; CHECK-NEXT: vldr s6, .LCPI46_1 -; CHECK-NEXT: vmaxnm.f32 s5, s1, s4 -; CHECK-NEXT: vcvtb.f32.f16 s0, s0 -; CHECK-NEXT: vmaxnm.f32 s14, s12, s4 -; CHECK-NEXT: vminnm.f32 s5, s5, s6 -; CHECK-NEXT: vmaxnm.f32 s7, s0, s4 -; CHECK-NEXT: vminnm.f32 s7, s7, s6 -; CHECK-NEXT: vcvt.s32.f32 s5, s5 -; CHECK-NEXT: vcvt.s32.f32 s7, s7 -; CHECK-NEXT: vminnm.f32 s14, s14, s6 -; CHECK-NEXT: vcvt.s32.f32 s14, s14 -; CHECK-NEXT: vmaxnm.f32 s10, s8, s4 -; CHECK-NEXT: vminnm.f32 s10, s10, s6 -; CHECK-NEXT: vcmp.f32 s1, s1 -; CHECK-NEXT: vcvt.s32.f32 s10, s10 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s2 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmov r1, s7 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vcmp.f32 s12, s12 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s8, s8 -; CHECK-NEXT: lsll r2, r7, #19 -; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: vmov r12, s14 -; CHECK-NEXT: vmaxnm.f32 s8, s0, s4 -; CHECK-NEXT: orr.w r1, r1, r2 -; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r12, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: vcvtt.f32.f16 s0, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s0, s4 -; CHECK-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vcvt.s32.f32 s8, s8 -; CHECK-NEXT: bfc r3, #19, #13 -; CHECK-NEXT: mov r2, r12 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: lsrl r2, r1, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: lsrl r4, r9, #26 -; CHECK-NEXT: vcvtt.f32.f16 s0, s3 -; CHECK-NEXT: mov lr, r1 -; CHECK-NEXT: orr.w r1, r4, r2 -; CHECK-NEXT: vmov r4, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s0, s4 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: lsll r2, r5, #12 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: orrs r2, r1 -; CHECK-NEXT: bfc r4, #19, #13 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: lsll r4, r1, #31 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: orrs r2, r4 -; CHECK-NEXT: str r2, [r0, #8] -; CHECK-NEXT: orr.w r2, r7, r3, lsl #6 -; CHECK-NEXT: vcvtb.f32.f16 s0, s3 -; CHECK-NEXT: orr.w r3, r2, r12, lsl #25 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmaxnm.f32 s2, s0, s4 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: lsll r2, r7, #5 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: vmov r7, s2 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: mov r4, r7 -; CHECK-NEXT: bfc r4, #19, #13 -; CHECK-NEXT: lsrl r4, r11, #14 -; CHECK-NEXT: orrs r2, r4 -; CHECK-NEXT: strh r2, [r0, #16] -; CHECK-NEXT: str r3, [r0, #4] -; CHECK-NEXT: lsrs r2, r2, #16 -; CHECK-NEXT: strb r2, [r0, #18] -; CHECK-NEXT: orr.w r2, r9, lr -; CHECK-NEXT: orrs r2, r5 -; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: orr.w r1, r1, r7, lsl #18 -; CHECK-NEXT: str r1, [r0, #12] -; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI46_0: -; CHECK-NEXT: .long 0xc8800000 @ float -262144 -; CHECK-NEXT: .LCPI46_1: -; CHECK-NEXT: .long 0x487fffc0 @ float 262143 - %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) - ret <8 x i19> %x -} - -define arm_aapcs_vfpcc <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vmovx.f16 s6, s0 -; CHECK-NEXT: vcvt.s32.f16 s8, s4 -; CHECK-NEXT: vmovx.f16 s4, s2 -; CHECK-NEXT: vcvt.s32.f16 s10, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vcvt.s32.f16 s14, s2 -; CHECK-NEXT: vcvt.s32.f16 s2, s1 -; CHECK-NEXT: vcvt.s32.f16 s0, s0 -; CHECK-NEXT: vcvt.s32.f16 s4, s4 -; CHECK-NEXT: vcvt.s32.f16 s6, s6 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: vcvt.s32.f16 s12, s3 -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r1, s6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov r1, s14 -; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov r0, s8 -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 -; CHECK-NEXT: bx lr - %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) - ret <8 x i32> %x -} - -define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i50: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vcvtt.f32.f16 s30, s19 -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vldr s24, .LCPI48_1 -; CHECK-NEXT: vcvtb.f32.f16 s20, s16 -; CHECK-NEXT: vcvtb.f32.f16 s28, s19 -; CHECK-NEXT: vcmp.f32 s30, s24 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r7, s20 -; CHECK-NEXT: vldr s22, .LCPI48_0 -; CHECK-NEXT: vmov r6, s28 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s26, s24 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9, #25] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s20, s24 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s24 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r9] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: mov r7, r5 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: lsll r4, r7, #22 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: vcvtt.f32.f16 s28, s18 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: orr.w r0, r1, r7 -; CHECK-NEXT: str.w r0, [r9, #45] -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: orrs r4, r2 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: vcvtb.f32.f16 s18, s17 -; CHECK-NEXT: lsrs r0, r5, #10 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: str.w r4, [r9, #41] -; CHECK-NEXT: strb.w r0, [r9, #49] -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: mov r4, r7 -; CHECK-NEXT: lsrl r4, r1, #14 -; CHECK-NEXT: orr.w r6, r1, r6, lsl #4 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtt.f32.f16 s28, s17 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s18, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: str.w r6, [r9, #37] -; CHECK-NEXT: str.w r4, [r9, #33] -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: movtlt r4, #65534 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s24 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r4, #65535 -; CHECK-NEXT: movtgt r4, #1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movwlt r10, #0 -; CHECK-NEXT: movtlt r10, #65534 -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r10, #65535 -; CHECK-NEXT: movtgt r10, #1 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: bfc r10, #18, #14 -; CHECK-NEXT: vcmp.f32 s28, s24 -; CHECK-NEXT: orr.w r0, r10, r7, lsl #18 -; CHECK-NEXT: str.w r0, [r9, #29] -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s24 -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: vcmp.f32 s20, s24 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: mov r10, r11 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsll r6, r1, #22 -; CHECK-NEXT: lsrl r10, r5, #28 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movwlt r8, #0 -; CHECK-NEXT: movtlt r8, #65534 -; CHECK-NEXT: vcmp.f32 s20, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r8, #65535 -; CHECK-NEXT: movtgt r8, #1 -; CHECK-NEXT: orrs r1, r5 -; CHECK-NEXT: str.w r1, [r9, #20] -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s24 -; CHECK-NEXT: orr.w r2, r10, r6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #1 -; CHECK-NEXT: str.w r2, [r9, #16] -; CHECK-NEXT: lsrs r2, r4, #10 -; CHECK-NEXT: vcmp.f32 s16, s24 -; CHECK-NEXT: strb.w r2, [r9, #24] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: orr.w r1, r1, r11, lsl #4 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strd r2, r1, [r9, #8] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: bfc r8, #18, #14 -; CHECK-NEXT: orr.w r0, r8, r0, lsl #18 -; CHECK-NEXT: str.w r0, [r9, #4] -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI48_0: -; CHECK-NEXT: .long 0x57ffffff @ float 5.6294992E+14 -; CHECK-NEXT: .LCPI48_1: -; CHECK-NEXT: .long 0xd8000000 @ float -5.62949953E+14 - %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) - ret <8 x i50> %x -} - -define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i64: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s20, s19 -; CHECK-NEXT: vmov r0, s20 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s22, s19 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: vldr s30, .LCPI49_1 -; CHECK-NEXT: vldr s28, .LCPI49_0 -; CHECK-NEXT: vcvtb.f32.f16 s24, s16 -; CHECK-NEXT: vcmp.f32 s20, s30 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s20, s28 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vmov r5, s16 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s22, s30 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s22 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s30 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r8, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r8, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vcmp.f32 s22, s30 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #-2147483648 -; CHECK-NEXT: vcmp.f32 s22, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r10, #-2147483648 -; CHECK-NEXT: vcmp.f32 s22, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtt.f32.f16 s19, s17 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vmov r1, s19 -; CHECK-NEXT: vcmp.f32 s24, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s24, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vmov q5[2], q5[0], r0, r6 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s17, s17 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vcmp.f32 s19, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3 +; CHECK-MVE-NEXT: vldr s4, .LCPI45_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s6 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s6 +; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s4 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2 +; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s11, s2, s6 +; CHECK-MVE-NEXT: vcvt.s32.f32 s16, s16 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s6 +; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s4 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-MVE-NEXT: vcvt.s32.f32 s15, s15 +; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s0 +; CHECK-MVE-NEXT: vmaxnm.f32 s5, s1, s6 +; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s11, s11 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s6 +; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s9, s9 +; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0 +; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s4 +; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s5, s5 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s0, s6 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-MVE-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r12, s16 +; CHECK-MVE-NEXT: vcmp.f32 s13, s13 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs.w r12, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov lr, s15 +; CHECK-MVE-NEXT: vcmp.f32 s2, s2 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs.w lr, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r2, s11 +; CHECK-MVE-NEXT: vcmp.f32 s7, s7 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r2, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r3, s9 +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r0, s5 +; CHECK-MVE-NEXT: vcmp.f32 s12, s12 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmov r1, s14 +; CHECK-MVE-NEXT: vmov r4, s10 +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r1, #0 +; CHECK-MVE-NEXT: vcmp.f32 s8, s8 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r4, #0 +; CHECK-MVE-NEXT: vcmp.f32 s0, s0 +; CHECK-MVE-NEXT: vmov.16 q0[0], r4 +; CHECK-MVE-NEXT: vmov r5, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: it vs +; CHECK-MVE-NEXT: movvs r5, #0 +; CHECK-MVE-NEXT: vmov.16 q0[1], r5 +; CHECK-MVE-NEXT: vmov.16 q0[2], r0 +; CHECK-MVE-NEXT: vmov.16 q0[3], r1 +; CHECK-MVE-NEXT: vmov.16 q0[4], r2 +; CHECK-MVE-NEXT: vmov.16 q0[5], r3 +; CHECK-MVE-NEXT: vmov.16 q0[6], r12 +; CHECK-MVE-NEXT: vmov.16 q0[7], lr +; CHECK-MVE-NEXT: vpop {d8} +; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} +; CHECK-MVE-NEXT: .p2align 2 +; CHECK-MVE-NEXT: @ %bb.1: +; CHECK-MVE-NEXT: .LCPI45_0: +; CHECK-MVE-NEXT: .long 0x46fffe00 @ float 32767 +; CHECK-MVE-NEXT: .LCPI45_1: +; CHECK-MVE-NEXT: .long 0xc7000000 @ float -32768 +; +; CHECK-MVEFP-LABEL: test_signed_v8f16_v8i16: +; CHECK-MVEFP: @ %bb.0: +; CHECK-MVEFP-NEXT: vcvt.s16.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr + %x = call <8 x i16> @llvm.fptosi.sat.v8f16.v8i16(<8 x half> %f) + ret <8 x i16> %x +} + +define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i19: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr} +; CHECK-NEXT: vldr s6, .LCPI46_1 +; CHECK-NEXT: vcvtb.f32.f16 s12, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s0 +; CHECK-NEXT: vldr s4, .LCPI46_0 +; CHECK-NEXT: vmaxnm.f32 s5, s0, s6 +; CHECK-NEXT: vmaxnm.f32 s14, s12, s6 +; CHECK-NEXT: vminnm.f32 s5, s5, s4 +; CHECK-NEXT: vcvtt.f32.f16 s8, s1 +; CHECK-NEXT: vminnm.f32 s14, s14, s4 +; CHECK-NEXT: vcvt.s32.f32 s5, s5 +; CHECK-NEXT: vmaxnm.f32 s10, s8, s6 +; CHECK-NEXT: vcvt.s32.f32 s14, s14 +; CHECK-NEXT: vminnm.f32 s10, s10, s4 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-NEXT: vmaxnm.f32 s7, s1, s6 +; CHECK-NEXT: vminnm.f32 s7, s7, s4 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: vcvt.s32.f32 s7, s7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vcmp.f32 s12, s12 +; CHECK-NEXT: mov.w r7, #0 +; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: vcvtb.f32.f16 s0, s2 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r5, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r5, #-2147483648 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: bfc r2, #19, #13 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s30 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r7, #-2147483648 -; CHECK-NEXT: vcmp.f32 s24, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r7, #-2147483648 -; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vcmp.f32 s8, s8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsll r2, r7, #19 +; CHECK-NEXT: bfc r1, #19, #13 +; CHECK-NEXT: vmov r12, s10 +; CHECK-NEXT: vcmp.f32 s1, s1 +; CHECK-NEXT: vmaxnm.f32 s8, s0, s6 +; CHECK-NEXT: orr.w r1, r1, r2 +; CHECK-NEXT: str r1, [r0] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vmov q5[3], q5[1], r7, r5 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtt.f32.f16 s16, s18 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vmov r1, s16 -; CHECK-NEXT: vcmp.f32 s17, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: movvs.w r12, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s2 +; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 +; CHECK-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: vcvt.s32.f32 s2, s2 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vmov q6[2], q6[0], r0, r6 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: vcvt.s32.f32 s8, s8 +; CHECK-NEXT: bfc r3, #19, #13 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: lsrl r2, r1, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s30 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: lsrl r4, r9, #26 +; CHECK-NEXT: vcvtt.f32.f16 s0, s3 +; CHECK-NEXT: mov lr, r1 +; CHECK-NEXT: orr.w r1, r4, r2 +; CHECK-NEXT: vmov r4, s2 +; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 +; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r4, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r4, #-2147483648 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r2, r5, #12 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s30 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: orrs r2, r1 +; CHECK-NEXT: bfc r4, #19, #13 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: lsll r4, r1, #31 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: str r2, [r0, #8] +; CHECK-NEXT: orr.w r2, r7, r3, lsl #6 +; CHECK-NEXT: vcvtb.f32.f16 s0, s3 +; CHECK-NEXT: orr.w r3, r2, r12, lsl #25 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r7, #-2147483648 -; CHECK-NEXT: vcmp.f32 s17, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r7, #-2147483648 -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vmov q6[3], q6[1], r7, r4 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s18, s30 -; CHECK-NEXT: vmov q3[2], q3[0], r11, r9 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s30 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r5, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r5, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s30 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s18, s28 -; CHECK-NEXT: vmov q2[2], q2[0], r0, r6 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vmov q3[3], q3[1], r10, r8 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: lsll r2, r7, #5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: vmov r7, s2 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r5 -; CHECK-NEXT: vmov q0, q5 -; CHECK-NEXT: vmov q1, q6 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: mov r4, r7 +; CHECK-NEXT: bfc r4, #19, #13 +; CHECK-NEXT: lsrl r4, r11, #14 +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: strh r2, [r0, #16] +; CHECK-NEXT: str r3, [r0, #4] +; CHECK-NEXT: lsrs r2, r2, #16 +; CHECK-NEXT: strb r2, [r0, #18] +; CHECK-NEXT: orr.w r2, r9, lr +; CHECK-NEXT: orrs r2, r5 +; CHECK-NEXT: orrs r1, r2 +; CHECK-NEXT: orr.w r1, r1, r7, lsl #18 +; CHECK-NEXT: str r1, [r0, #12] +; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI49_0: -; CHECK-NEXT: .long 0x5effffff @ float 9.22337149E+18 -; CHECK-NEXT: .LCPI49_1: -; CHECK-NEXT: .long 0xdf000000 @ float -9.22337203E+18 - %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) - ret <8 x i64> %x +; CHECK-NEXT: .LCPI46_0: +; CHECK-NEXT: .long 0x487fffc0 @ float 262143 +; CHECK-NEXT: .LCPI46_1: +; CHECK-NEXT: .long 0xc8800000 @ float -262144 + %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) + ret <8 x i19> %x } -define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i100: +define arm_aapcs_vfpcc <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmovx.f16 s4, s3 +; CHECK-NEXT: vmovx.f16 s6, s0 +; CHECK-NEXT: vcvt.s32.f16 s8, s4 +; CHECK-NEXT: vmovx.f16 s4, s2 +; CHECK-NEXT: vcvt.s32.f16 s10, s4 +; CHECK-NEXT: vmovx.f16 s4, s1 +; CHECK-NEXT: vcvt.s32.f16 s14, s2 +; CHECK-NEXT: vcvt.s32.f16 s2, s1 +; CHECK-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-NEXT: vcvt.s32.f16 s6, s6 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vcvt.s32.f16 s12, s3 +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vmov r0, s12 +; CHECK-NEXT: vmov r1, s14 +; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 +; CHECK-NEXT: bx lr + %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) + ret <8 x i32> %x +} + +define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i50: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -5417,996 +4561,1039 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vcvtb.f32.f16 s21, s19 -; CHECK-NEXT: vcvtt.f32.f16 s24, s19 -; CHECK-NEXT: vmov r0, s21 -; CHECK-NEXT: vcvtb.f32.f16 s26, s16 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s28, s17 -; CHECK-NEXT: vcvtb.f32.f16 s30, s18 -; CHECK-NEXT: vldr s20, .LCPI50_2 -; CHECK-NEXT: vmov r8, s24 -; CHECK-NEXT: vmov r9, s26 -; CHECK-NEXT: vcvtt.f32.f16 s22, s18 -; CHECK-NEXT: vmov r6, s28 -; CHECK-NEXT: vmov r7, s30 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vldr s18, .LCPI50_3 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: vcmp.f32 s21, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s21 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtt.f32.f16 s30, s16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s30 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vldr s20, .LCPI48_0 +; CHECK-NEXT: vldr s22, .LCPI48_1 +; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s20 -; CHECK-NEXT: str.w r2, [r11, #83] -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s21 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r11, #79] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s21, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s21, s21 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r11, #75] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: vcmp.f32 s30, s22 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s18 +; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: str.w r2, [r11, #58] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s18 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r11, #54] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r11, #50] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s18 -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 +; CHECK-NEXT: movgt.w r4, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s18 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: str.w r2, [r11, #33] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s18 +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r11, #29] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movtlt r6, #65534 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r6, #65535 +; CHECK-NEXT: movtgt r6, #1 +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r11, #25] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s18 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s18 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: str r6, [sp] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str.w r2, [r11, #8] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r11, #4] -; CHECK-NEXT: it lt +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movtlt r6, #65534 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r6, #65535 +; CHECK-NEXT: movtgt r6, #1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r11] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s24, s18 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s21, s18 +; CHECK-NEXT: str.w r0, [r7, #25] +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r5, #7 -; CHECK-NEXT: vcmp.f32 s21, s20 ; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movwlt r9, #0 +; CHECK-NEXT: movtlt r9, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r5, #7 -; CHECK-NEXT: vcmp.f32 s21, s21 -; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vcvtb.f32.f16 s18, s19 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #1 +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: and r0, r5, #15 -; CHECK-NEXT: orr.w r1, r0, r6, lsl #4 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: str.w r1, [r11, #87] -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s22, s18 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: it lt +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: ittt lt ; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movwlt r11, #0 +; CHECK-NEXT: movtlt r11, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s22 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vcvtt.f32.f16 s18, s19 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r11, #65535 +; CHECK-NEXT: movtgt r11, #1 ; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: and r0, r7, #15 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 -; CHECK-NEXT: vcvtt.f32.f16 s30, s17 -; CHECK-NEXT: str.w r0, [r11, #62] -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s18 -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r0, #7 +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, #7 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 +; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r11, #37] -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s18 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: bfc r11, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: lsrl r2, r11, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s18 -; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 -; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: str r0, [r7] +; CHECK-NEXT: lsrs r0, r5, #10 +; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: bfc r9, #18, #14 +; CHECK-NEXT: lsll r4, r5, #22 +; CHECK-NEXT: bfc r6, #18, #14 +; CHECK-NEXT: orr.w r3, r11, r5 +; CHECK-NEXT: str.w r3, [r7, #45] +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: str.w r2, [r7, #41] +; CHECK-NEXT: strb.w r0, [r7, #49] +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: lsrl r0, r9, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: orr.w r2, r9, r10, lsl #4 +; CHECK-NEXT: str.w r2, [r7, #37] +; CHECK-NEXT: str.w r0, [r7, #33] +; CHECK-NEXT: orr.w r0, r6, r8, lsl #18 +; CHECK-NEXT: str.w r0, [r7, #29] +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: lsr.w r0, r3, #10 +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: lsll r6, r3, #22 +; CHECK-NEXT: lsrl r2, r5, #28 +; CHECK-NEXT: orr.w r3, r3, r5 +; CHECK-NEXT: str r3, [r7, #20] +; CHECK-NEXT: orr.w r2, r2, r6 +; CHECK-NEXT: str r2, [r7, #16] +; CHECK-NEXT: strb r0, [r7, #24] +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: lsrl r0, r3, #14 +; CHECK-NEXT: orr.w r2, r3, r4, lsl #4 +; CHECK-NEXT: strd r0, r2, [r7, #8] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vcmp.f32 s24, s18 -; CHECK-NEXT: and r7, r7, #15 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: orr.w r7, r7, r0, lsl #4 -; CHECK-NEXT: str.w r7, [r11, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: b.w .LBB50_3 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: orr.w r0, r1, r6, lsl #18 +; CHECK-NEXT: str r0, [r7, #4] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI50_2: -; CHECK-NEXT: .long 0x70ffffff @ float 6.33825262E+29 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: .LCPI50_3: -; CHECK-NEXT: .long 0xf1000000 @ float -6.338253E+29 -; CHECK-NEXT: .p2align 1 -; CHECK-NEXT: .LBB50_3: -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s18 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: .LCPI48_0: +; CHECK-NEXT: .long 0xd8000000 @ float -5.62949953E+14 +; CHECK-NEXT: .LCPI48_1: +; CHECK-NEXT: .long 0x57ffffff @ float 5.6294992E+14 + %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) + ret <8 x i50> %x +} + +define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtt.f32.f16 s24, s19 +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s26, s19 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vldr s30, .LCPI49_0 +; CHECK-NEXT: vldr s28, .LCPI49_1 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: vcmp.f32 s24, s30 +; CHECK-NEXT: vcvtt.f32.f16 s22, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r8, #-2147483648 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: vcmp.f32 s24, s28 +; CHECK-NEXT: vcvtt.f32.f16 s20, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r6, r9, #28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: mvngt r8, #-2147483648 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: vmov r4, s22 +; CHECK-NEXT: vmov r6, s20 +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: orr.w r7, r9, r8, lsl #4 -; CHECK-NEXT: str.w r7, [r11, #95] -; CHECK-NEXT: str.w r6, [r11, #91] -; CHECK-NEXT: vcmp.f32 s24, s18 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: vcmp.f32 s26, s30 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt.w r11, #-2147483648 +; CHECK-NEXT: vcmp.f32 s26, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 -; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r11, #-2147483648 +; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: and r7, r7, #15 -; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: lsrl r8, r7, #28 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vcmp.f32 s22, s30 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strb.w r8, [r11, #99] -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r4, #-2147483648 ; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: vcmp.f32 s22, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: mvngt r4, #-2147483648 ; CHECK-NEXT: vcmp.f32 s22, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s20, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: lsrl r10, r5, #28 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r6, #-2147483648 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s20, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vcmp.f32 s22, s22 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: vcmp.f32 s20, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 -; CHECK-NEXT: str.w r7, [r11, #70] -; CHECK-NEXT: str.w r10, [r11, #66] -; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 -; CHECK-NEXT: vcmp.f32 s22, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: and r5, r7, #15 -; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: lsrl r6, r5, #28 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r5, r4 -; CHECK-NEXT: strb.w r6, [r11, #74] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt.w r1, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r1, #-2147483648 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload -; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r4, r5, #28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: vcvtt.f32.f16 s16, s17 +; CHECK-NEXT: vmov q5[2], q5[0], r0, r7 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 -; CHECK-NEXT: str.w r7, [r11, #45] -; CHECK-NEXT: str.w r4, [r11, #41] -; CHECK-NEXT: vcmp.f32 s30, s18 -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r7, #7 -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r6, #-2147483648 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #7 -; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vcvtb.f32.f16 s16, s17 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: mvngt r6, #-2147483648 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: and r5, r7, #15 -; CHECK-NEXT: vcmp.f32 s16, s18 -; CHECK-NEXT: lsrl r6, r5, #28 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: strb.w r6, [r11, #49] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt.w r1, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r1, #-2147483648 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s18 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: vcvtb.f32.f16 s16, s18 +; CHECK-NEXT: vmov q6[2], q6[0], r0, r7 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: vcmp.f32 s16, s18 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: strd r0, r1, [r11, #16] -; CHECK-NEXT: it lt -; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt.w r1, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: itt gt +; CHECK-NEXT: mvngt r1, #-2147483648 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vmov q0, q5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r11, #24] -; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: vmov q2[2], q2[0], r0, r5 +; CHECK-NEXT: vmov q1, q6 +; CHECK-NEXT: vmov q2[3], q2[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: @ %bb.4: - %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f) - ret <8 x i100> %x +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI49_0: +; CHECK-NEXT: .long 0xdf000000 @ float -9.22337203E+18 +; CHECK-NEXT: .LCPI49_1: +; CHECK-NEXT: .long 0x5effffff @ float 9.22337149E+18 + %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) + ret <8 x i64> %x } -define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i128: +define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i100: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .pad #56 +; CHECK-NEXT: sub sp, #56 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s28, s19 -; CHECK-NEXT: vcvtb.f32.f16 s20, s16 -; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vcvtt.f32.f16 s24, s16 -; CHECK-NEXT: vcvtb.f32.f16 s26, s17 -; CHECK-NEXT: vcvtb.f32.f16 s19, s19 -; CHECK-NEXT: vldr s22, .LCPI51_2 -; CHECK-NEXT: vmov r8, s20 -; CHECK-NEXT: vmov r9, s24 -; CHECK-NEXT: vcvtt.f32.f16 s30, s18 -; CHECK-NEXT: vmov r7, s26 -; CHECK-NEXT: vmov r6, s19 +; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vldr s16, .LCPI51_3 -; CHECK-NEXT: vmov r5, s30 -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: vcmp.f32 s28, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s16 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: vcvtt.f32.f16 s26, s17 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vldr s22, .LCPI50_0 +; CHECK-NEXT: vldr s20, .LCPI50_1 +; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: str r3, [r4, #124] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s16 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: str r7, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcvtt.f32.f16 s24, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: str r2, [r4, #120] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s16 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #116] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #112] -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: vcvtb.f32.f16 s24, s17 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s19, s16 -; CHECK-NEXT: vcvtt.f32.f16 s28, s17 +; CHECK-NEXT: vcvtb.f32.f16 s18, s18 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: str r3, [r4, #108] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: vcvtb.f32.f16 s18, s19 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s16 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: str r2, [r4, #104] -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: mvnlt r9, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r9, #7 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s16 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str.w r2, [r10, #83] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #100] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: str.w r1, [r10, #79] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #96] -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vmov r6, s18 +; CHECK-NEXT: vcvtt.f32.f16 s18, s19 +; CHECK-NEXT: str.w r0, [r10, #75] +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: str.w r4, [r10, #58] +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str.w r11, [r10, #54] +; CHECK-NEXT: str.w r7, [r10, #50] +; CHECK-NEXT: str.w r6, [r10, #33] +; CHECK-NEXT: str.w r8, [r10, #29] +; CHECK-NEXT: str.w r5, [r10, #25] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s30, s16 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: mov r4, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r4, #7 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: it gt -; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt r4, #7 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s16 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: itttt vs +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: str r3, [r4, #92] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: str r2, [r4, #88] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s16 +; CHECK-NEXT: str.w r2, [r10, #8] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #84] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str.w r1, [r10, #4] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #80] -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: lsrl r0, r7, #28 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: orr.w r1, r7, r6, lsl #4 +; CHECK-NEXT: str.w r1, [r10, #95] +; CHECK-NEXT: and r1, r4, #15 +; CHECK-NEXT: str.w r0, [r10, #91] +; CHECK-NEXT: and r0, r9, #15 +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: strb.w r6, [r10, #99] +; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 +; CHECK-NEXT: str.w r0, [r10, #87] +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: str.w r1, [r10, #70] +; CHECK-NEXT: str.w r0, [r10, #66] +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb.w r2, [r10, #74] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r10, #62] +; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: str.w r1, [r10, #45] +; CHECK-NEXT: str.w r0, [r10, #41] +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb.w r2, [r10, #49] +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r10, #37] +; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: strd r0, r1, [r10, #16] +; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb.w r2, [r10, #24] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: and r0, r3, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r10, #12] +; CHECK-NEXT: add sp, #56 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI50_0: +; CHECK-NEXT: .long 0xf1000000 @ float -6.338253E+29 +; CHECK-NEXT: .LCPI50_1: +; CHECK-NEXT: .long 0x70ffffff @ float 6.33825262E+29 + %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f) + ret <8 x i100> %x +} + +define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { +; CHECK-LABEL: test_signed_v8f16_v8i128: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtt.f32.f16 s26, s19 +; CHECK-NEXT: vcvtb.f32.f16 s28, s19 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vcvtb.f32.f16 s24, s17 +; CHECK-NEXT: vldr s20, .LCPI51_0 ; CHECK-NEXT: vmov r5, s28 +; CHECK-NEXT: vmov r8, s24 +; CHECK-NEXT: vcvtt.f32.f16 s30, s18 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s16 +; CHECK-NEXT: vldr s22, .LCPI51_1 +; CHECK-NEXT: add.w r12, r4, #112 +; CHECK-NEXT: vmov r6, s30 +; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: str r3, [r4, #76] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s16 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: str r2, [r4, #72] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s16 -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #68] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #64] +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: add.w r12, r4, #96 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: str r3, [r4, #60] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s16 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: str r2, [r4, #56] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s16 -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: vmov r7, s26 +; CHECK-NEXT: vcvtt.f32.f16 s28, s17 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s30, s22 +; CHECK-NEXT: add.w r12, r4, #80 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #52] -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #48] +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmov r5, s28 +; CHECK-NEXT: vcvtt.f32.f16 s18, s16 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: add.w r12, r4, #64 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: str r3, [r4, #44] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: str r2, [r4, #40] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s16 -; CHECK-NEXT: b.w .LBB51_3 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_2: -; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: .LCPI51_3: -; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 -; CHECK-NEXT: .p2align 1 -; CHECK-NEXT: .LBB51_3: -; CHECK-NEXT: it vs +; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #36] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s26, s26 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #32] -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: vmov r6, s18 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s24, s16 +; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: str r3, [r4, #28] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: str r2, [r4, #24] -; CHECK-NEXT: it lt +; CHECK-NEXT: add.w r12, r4, #32 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s16 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #20] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s20, s16 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s22 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s16 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s22 -; CHECK-NEXT: str r3, [r4, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s16 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s22 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r3, #-2147483648 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: it gt +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: mvngt r3, #-2147483648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s16 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s20, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s20, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} -; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI51_0: +; CHECK-NEXT: .long 0x7effffff @ float 1.70141173E+38 +; CHECK-NEXT: .LCPI51_1: +; CHECK-NEXT: .long 0xff000000 @ float -1.70141183E+38 %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x } diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index 5ab184a066e497..13609bd1903f2d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -39,40 +39,28 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f32_v2i32(<2 x float> %f) { ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s18, .LCPI1_0 ; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vcmp.f32 s17, s18 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r4, #0 ; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s18 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s18 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s16, s18 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -1059,38 +1047,26 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) { ; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt r4, #0 ; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -1504,112 +1480,91 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r4, s19 -; CHECK-NEXT: vldr s20, .LCPI28_0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vldr s20, .LCPI28_0 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #3 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: vcmp.f32 s19, #0 ; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: itt gt +; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movwgt r7, #65535 ; CHECK-NEXT: movtgt r7, #3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: str.w r6, [r8] -; CHECK-NEXT: it lt +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movwgt r10, #65535 +; CHECK-NEXT: movtgt r10, #3 ; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: mov r6, r10 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: str.w r0, [r9] +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: mov r6, r8 ; CHECK-NEXT: lsll r4, r1, #22 -; CHECK-NEXT: lsrl r6, r5, #28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: orrs r1, r5 +; CHECK-NEXT: lsrl r6, r7, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r9, #65535 -; CHECK-NEXT: movtgt r9, #3 -; CHECK-NEXT: str.w r1, [r8, #20] +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #3 +; CHECK-NEXT: orrs r1, r7 +; CHECK-NEXT: str.w r1, [r9, #20] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: orr.w r2, r6, r4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: bfc r9, #18, #14 +; CHECK-NEXT: bfc r5, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str.w r2, [r8, #16] -; CHECK-NEXT: lsrs r2, r7, #10 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: strb.w r2, [r8, #24] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: str.w r2, [r9, #16] +; CHECK-NEXT: lsr.w r2, r10, #10 +; CHECK-NEXT: strb.w r2, [r9, #24] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: orr.w r0, r9, r0, lsl #18 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: orr.w r0, r5, r0, lsl #18 ; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: orr.w r1, r1, r10, lsl #4 -; CHECK-NEXT: strd r2, r1, [r8, #8] -; CHECK-NEXT: str.w r0, [r8, #4] +; CHECK-NEXT: orr.w r1, r1, r8, lsl #4 +; CHECK-NEXT: strd r2, r1, [r9, #8] +; CHECK-NEXT: str.w r0, [r9, #4] ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-NEXT: .p2align 2 @@ -1636,85 +1591,61 @@ define arm_aapcs_vfpcc <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) { ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s20, .LCPI29_0 ; CHECK-NEXT: vcmp.f32 s19, #0 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmov r9, s17 ; CHECK-NEXT: vmov r8, s16 -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: movgt.w r11, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r4, #-1 ; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r7, r11 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q1[3], q1[1], r6, r10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 -; CHECK-NEXT: vmov q1[3], q1[1], r6, r10 ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -1735,179 +1666,125 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r9, r3 -; CHECK-NEXT: vmov r3, s16 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s20, .LCPI30_0 +; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: str r5, [sp] @ 4-byte Spill +; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: mov r10, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str.w r2, [r8, #33] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: str.w r2, [r4, #33] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r8, #29] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: str.w r1, [r4, #29] +; CHECK-NEXT: vmov r1, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8, #25] -; CHECK-NEXT: vmov r7, s17 -; CHECK-NEXT: vmov r4, s19 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: mov r10, r3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: str.w r0, [r4, #25] ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt.w r10, #15 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vcmp.f32 s19, #0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str.w r2, [r8, #8] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r11, #15 +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r8, #4] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8] -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #15 -; CHECK-NEXT: and r0, r9, #15 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: orr.w r0, r0, r4, lsl #4 -; CHECK-NEXT: str.w r0, [r8, #37] -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r11, r3 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #15 -; CHECK-NEXT: and r7, r10, #15 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: orr.w r7, r7, r0, lsl #4 -; CHECK-NEXT: str.w r7, [r8, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r4, r5, #28 -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 -; CHECK-NEXT: str.w r7, [r8, #45] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r4, [r8, #41] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #15 -; CHECK-NEXT: and r5, r11, #15 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: lsrl r6, r5, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strb.w r6, [r8, #49] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: str r2, [r4, #8] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r1, [r4, #4] ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: strd r0, r1, [r8, #16] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: lsrl r0, r9, #28 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: orr.w r1, r9, r8, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #45] +; CHECK-NEXT: and r1, r11, #15 +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: and r0, r10, #15 +; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: strb.w r8, [r4, #49] +; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #37] +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: lsrl r0, r7, #28 +; CHECK-NEXT: orr.w r1, r7, r6, lsl #4 +; CHECK-NEXT: strd r0, r1, [r4, #16] +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: strb r6, [r4, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r8, #24] +; CHECK-NEXT: and r0, r3, #15 +; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: str r0, [r4, #12] +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -1932,160 +1809,87 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vmov r5, s18 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s20, .LCPI31_0 ; CHECK-NEXT: vcmp.f32 s19, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: str r3, [r4, #60] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: str r2, [r4, #56] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #52] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #48] -; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: strd r5, r1, [r4, #48] ; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: strd r2, r3, [r4, #56] ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r3, [r4, #44] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r2, [r4, #40] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #36] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #32] +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: str r3, [r4, #28] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: str r2, [r4, #24] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #20] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str r3, [r4, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} @@ -3762,232 +3566,198 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtb.f32.f16 s22, s17 +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcvtb.f32.f16 s24, s18 +; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: vcvtt.f32.f16 s20, s18 +; CHECK-NEXT: vldr s18, .LCPI48_0 +; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcvtt.f32.f16 s26, s17 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: vcmp.f32 s22, s18 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #3 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vmov r5, s26 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: vmov r6, s20 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtt.f32.f16 s28, s19 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: vcvtb.f32.f16 s22, s16 -; CHECK-NEXT: vcvtb.f32.f16 s26, s19 ; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: vmov r5, s22 -; CHECK-NEXT: vldr s20, .LCPI48_0 -; CHECK-NEXT: vmov r11, s26 -; CHECK-NEXT: it lt +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r4, #65535 -; CHECK-NEXT: movtgt r4, #3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #3 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: str.w r7, [r10, #25] ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s26, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #3 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str.w r7, [r4, #25] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: mov r7, r4 -; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s20, s18 +; CHECK-NEXT: vcvtb.f32.f16 s20, s19 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: mov r0, r11 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: lsll r6, r7, #22 +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #3 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s20, s18 +; CHECK-NEXT: vcvtt.f32.f16 s20, s19 +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movwgt r11, #65535 +; CHECK-NEXT: movtgt r11, #3 +; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: vcvtt.f32.f16 s26, s18 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: orr.w r0, r1, r7 -; CHECK-NEXT: str.w r0, [r10, #45] -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: orrs r6, r2 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vcvtb.f32.f16 s18, s17 -; CHECK-NEXT: lsrs r0, r4, #10 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str.w r6, [r10, #41] -; CHECK-NEXT: strb.w r0, [r10, #49] -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: mov r4, r7 -; CHECK-NEXT: lsrl r4, r1, #14 -; CHECK-NEXT: orr.w r6, r1, r5, lsl #4 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vcmp.f32 s20, s18 +; CHECK-NEXT: vcvtb.f32.f16 s20, s16 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: movwgt r5, #65535 ; CHECK-NEXT: movtgt r5, #3 -; CHECK-NEXT: str.w r6, [r10, #37] -; CHECK-NEXT: str.w r4, [r10, #33] ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r9, #65535 -; CHECK-NEXT: movtgt r9, #3 -; CHECK-NEXT: bfc r9, #18, #14 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: orr.w r0, r9, r7, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #29] -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s20, s18 +; CHECK-NEXT: bfc r11, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: lsrs r0, r5, #10 ; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: lsll r10, r5, #22 +; CHECK-NEXT: lsrl r2, r11, #28 +; CHECK-NEXT: orr.w r1, r11, r5 +; CHECK-NEXT: str.w r1, [r4, #45] +; CHECK-NEXT: orr.w r1, r2, r10 +; CHECK-NEXT: str.w r1, [r4, #41] +; CHECK-NEXT: strb.w r0, [r4, #49] +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: lsrl r0, r7, #14 +; CHECK-NEXT: mov r5, r4 +; CHECK-NEXT: orr.w r1, r7, r9, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #37] +; CHECK-NEXT: str.w r0, [r4, #33] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: mov r8, r11 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: bfc r0, #18, #14 +; CHECK-NEXT: orr.w r0, r0, r6, lsl #18 +; CHECK-NEXT: str.w r0, [r4, #29] +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: mov r6, r9 ; CHECK-NEXT: lsll r4, r1, #22 -; CHECK-NEXT: lsrl r8, r5, #28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsrl r6, r3, #28 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #3 -; CHECK-NEXT: orrs r1, r5 -; CHECK-NEXT: str.w r1, [r10, #20] +; CHECK-NEXT: movwgt r8, #65535 +; CHECK-NEXT: movtgt r8, #3 +; CHECK-NEXT: orrs r1, r3 +; CHECK-NEXT: str r1, [r5, #20] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: orr.w r2, r8, r4 +; CHECK-NEXT: orr.w r2, r6, r4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: vcmp.f32 s16, s18 +; CHECK-NEXT: bfc r8, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str.w r2, [r10, #16] -; CHECK-NEXT: lsrs r2, r6, #10 -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: strb.w r2, [r10, #24] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: str r2, [r5, #16] +; CHECK-NEXT: lsrs r2, r7, #10 +; CHECK-NEXT: strb r2, [r5, #24] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: orr.w r0, r7, r0, lsl #18 +; CHECK-NEXT: bfc r1, #18, #14 +; CHECK-NEXT: orr.w r0, r8, r0, lsl #18 ; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: orr.w r1, r1, r11, lsl #4 -; CHECK-NEXT: strd r2, r1, [r10, #8] -; CHECK-NEXT: str.w r0, [r10, #4] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: orr.w r1, r1, r9, lsl #4 +; CHECK-NEXT: strd r2, r1, [r5, #8] +; CHECK-NEXT: str r0, [r5, #4] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 @@ -4005,8 +3775,8 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vcvtt.f32.f16 s20, s19 ; CHECK-NEXT: vmov r0, s20 @@ -4016,178 +3786,128 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: vldr s28, .LCPI49_0 ; CHECK-NEXT: vcmp.f32 s20, #0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 +; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcvtt.f32.f16 s24, s18 +; CHECK-NEXT: vcvtt.f32.f16 s26, s16 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: vcmp.f32 s20, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmov r5, s24 -; CHECK-NEXT: it gt +; CHECK-NEXT: vmov r4, s24 +; CHECK-NEXT: vmov r6, s26 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: vmov r4, s16 ; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s28 -; CHECK-NEXT: it lt +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s20, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vcmp.f32 s22, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt.w r11, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vcmp.f32 s24, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtt.f32.f16 s30, s17 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vmov r1, s30 -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmov q5[2], q5[0], r0, r6 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s30, #0 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s28 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: vcmp.f32 s26, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vcvtb.f32.f16 s16, s17 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmov q5[3], q5[1], r7, r5 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtt.f32.f16 s17, s18 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vmov r1, s17 ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmov q6[2], q6[0], r0, r6 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 +; CHECK-NEXT: vcvtt.f32.f16 s16, s17 +; CHECK-NEXT: vmov q5[2], q5[0], r0, r7 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 +; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vcvtb.f32.f16 s16, s18 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcvtb.f32.f16 s16, s17 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov q6[3], q6[1], r7, r4 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmov q3[2], q3[0], r11, r9 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: vcvtb.f32.f16 s16, s18 +; CHECK-NEXT: vmov q6[2], q6[0], r0, r7 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 +; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s16, s28 -; CHECK-NEXT: vmov q2[2], q2[0], r0, r6 +; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmov q2[3], q2[1], r1, r5 -; CHECK-NEXT: vmov q3[3], q3[1], r10, r8 +; CHECK-NEXT: vmov q2[2], q2[0], r0, r5 ; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vmov q2[3], q2[1], r1, r4 ; CHECK-NEXT: vmov q1, q6 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 @@ -4205,385 +3925,272 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} +; CHECK-NEXT: .pad #56 +; CHECK-NEXT: sub sp, #56 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vcvtb.f32.f16 s30, s19 -; CHECK-NEXT: vcvtb.f32.f16 s28, s18 -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: vcvtt.f32.f16 s22, s19 -; CHECK-NEXT: vcvtb.f32.f16 s24, s16 -; CHECK-NEXT: vcvtb.f32.f16 s26, s17 -; CHECK-NEXT: vldr s20, .LCPI50_1 -; CHECK-NEXT: vmov r4, s22 -; CHECK-NEXT: vmov r7, s28 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 -; CHECK-NEXT: vmov r9, s24 -; CHECK-NEXT: vmov r6, s26 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtt.f32.f16 s22, s16 +; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s30, #0 -; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: vldr s20, .LCPI50_0 +; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: str.w r2, [r8, #83] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 +; CHECK-NEXT: str r2, [sp, #48] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r8, #79] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8, #75] -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: str r7, [sp, #40] @ 4-byte Spill ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: mov r7, r3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: vcvtt.f32.f16 s22, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: str.w r2, [r8, #58] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r8, #54] -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8, #50] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str.w r2, [r8, #33] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r8, #29] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8, #25] -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: str.w r2, [r8, #8] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r1, [r8, #4] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r8] -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: vcvtb.f32.f16 s22, s17 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcvtb.f32.f16 s18, s18 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 ; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r5, #15 -; CHECK-NEXT: and r0, r5, #15 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: orr.w r1, r0, r6, lsl #4 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: str.w r1, [r8, #87] ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcvtb.f32.f16 s18, s19 +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: mov r7, r2 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt.w r11, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: and r0, r7, #15 -; CHECK-NEXT: vcvtt.f32.f16 s28, s17 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 -; CHECK-NEXT: str.w r0, [r8, #62] -; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: mov r10, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r1, r0 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: str.w r2, [r4, #83] ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, #15 -; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r8, #37] -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: str.w r1, [r4, #79] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: and r7, r7, #15 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: orr.w r7, r7, r0, lsl #4 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r7, [r8, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: lsrl r6, r9, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: orr.w r7, r9, r4, lsl #4 -; CHECK-NEXT: str.w r7, [r8, #95] -; CHECK-NEXT: str.w r6, [r8, #91] -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: vcvtt.f32.f16 s18, s19 +; CHECK-NEXT: str.w r0, [r4, #75] +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: str.w r7, [r4, #58] +; CHECK-NEXT: str.w r11, [r4, #54] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str.w r5, [r4, #50] +; CHECK-NEXT: str.w r6, [r4, #33] +; CHECK-NEXT: str.w r8, [r4, #29] +; CHECK-NEXT: str.w r9, [r4, #25] ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: and r7, r7, #15 +; CHECK-NEXT: movgt.w r10, #15 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: lsrl r4, r7, #28 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strb.w r4, [r8, #99] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r10, r5, #28 -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: orr.w r7, r5, r6, lsl #4 -; CHECK-NEXT: str.w r7, [r8, #70] -; CHECK-NEXT: str.w r10, [r8, #66] -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: and r5, r7, #15 -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: lsrl r6, r5, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strb.w r6, [r8, #74] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsrl r4, r11, #28 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: b.w .LBB50_2 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI50_1: -; CHECK-NEXT: .long 0x717fffff @ float 1.26765052E+30 -; CHECK-NEXT: .p2align 1 -; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: it gt +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r8, #15 ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: orr.w r7, r11, r6, lsl #4 -; CHECK-NEXT: str.w r7, [r8, #45] -; CHECK-NEXT: str.w r4, [r8, #41] -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r7, #15 -; CHECK-NEXT: and r5, r7, #15 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: lsrl r6, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: strb.w r6, [r8, #49] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: strd r0, r1, [r8, #16] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: str r2, [r4, #8] +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: str r1, [r4, #4] +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: lsrl r0, r7, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: orr.w r1, r7, r6, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #95] +; CHECK-NEXT: and r1, r8, #15 +; CHECK-NEXT: str.w r0, [r4, #91] +; CHECK-NEXT: and r0, r10, #15 +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: strb.w r6, [r4, #99] +; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #87] +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #70] +; CHECK-NEXT: str.w r0, [r4, #66] +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb.w r2, [r4, #74] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #62] +; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: str.w r1, [r4, #45] +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb.w r2, [r4, #49] +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #37] +; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #48] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: lsrl r0, r1, #28 +; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 +; CHECK-NEXT: strd r0, r1, [r4, #16] +; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r2, r1, #28 +; CHECK-NEXT: strb r2, [r4, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r8, #24] -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: and r0, r3, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str r0, [r4, #12] +; CHECK-NEXT: add sp, #56 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI50_0: +; CHECK-NEXT: .long 0x717fffff @ float 1.26765052E+30 %x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f) ret <8 x i100> %x } @@ -4591,337 +4198,195 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s26, s19 -; CHECK-NEXT: vcvtb.f32.f16 s22, s16 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: vcvtb.f32.f16 s24, s17 -; CHECK-NEXT: vcvtb.f32.f16 s30, s19 -; CHECK-NEXT: vldr s20, .LCPI51_0 -; CHECK-NEXT: vmov r8, s22 -; CHECK-NEXT: vmov r9, s16 -; CHECK-NEXT: vcvtt.f32.f16 s28, s18 -; CHECK-NEXT: vmov r7, s24 -; CHECK-NEXT: vmov r6, s30 +; CHECK-NEXT: vcvtt.f32.f16 s22, s19 +; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 +; CHECK-NEXT: vcvtb.f32.f16 s28, s19 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: vldr s20, .LCPI51_0 +; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vcvtt.f32.f16 s24, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str r3, [r4, #124] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str r2, [r4, #120] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #116] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #112] -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r5, s28 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: strd r6, r1, [r4, #112] +; CHECK-NEXT: vmov r7, s24 +; CHECK-NEXT: vmov r5, s26 +; CHECK-NEXT: vcvtt.f32.f16 s18, s17 +; CHECK-NEXT: strd r2, r3, [r4, #120] ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s30, #0 -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 +; CHECK-NEXT: vcmp.f32 s28, #0 +; CHECK-NEXT: add.w r12, r4, #96 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: str r3, [r4, #108] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 -; CHECK-NEXT: str r2, [r4, #104] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmov r6, s18 +; CHECK-NEXT: vcvtb.f32.f16 s22, s17 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: add.w r12, r4, #80 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #100] -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r3, #-1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #96] +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vmov r6, s18 +; CHECK-NEXT: vcvtt.f32.f16 s24, s16 +; CHECK-NEXT: vmov r7, s22 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s28, #0 +; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: add.w r12, r4, #64 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: str r3, [r4, #92] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: str r2, [r4, #88] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #84] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #80] +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r5, s26 +; CHECK-NEXT: vmov r5, s24 +; CHECK-NEXT: vcvtb.f32.f16 s16, s16 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r3, [r4, #76] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: str r2, [r4, #72] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #68] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #64] -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmov r6, s16 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str r3, [r4, #60] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: str r2, [r4, #56] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #52] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #48] -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: str r3, [r4, #44] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: str r2, [r4, #40] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #36] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #32] -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str r3, [r4, #28] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: str r2, [r4, #24] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #20] -; CHECK-NEXT: it lt +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #16] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: str r3, [r4, #12] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: str r2, [r4, #8] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r1, [r4, #4] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI51_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll index 117469f3bd788b..101b49fea488a8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -300,27 +300,27 @@ define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> % ; CHECK-MVE-NEXT: vcmp.f16 s10, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmovx.f16 s10, s2 -; CHECK-MVE-NEXT: vcmp.f16 s10, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s5 -; CHECK-MVE-NEXT: vmovx.f16 s10, s1 ; CHECK-MVE-NEXT: csetm r12, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 +; CHECK-MVE-NEXT: vcmp.f16 s10, s8 ; CHECK-MVE-NEXT: csetm lr, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s10, s8 +; CHECK-MVE-NEXT: vcmp.f16 s2, s6 +; CHECK-MVE-NEXT: vmovx.f16 s2, s5 +; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: csetm r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 +; CHECK-MVE-NEXT: vcmp.f16 s6, s2 ; CHECK-MVE-NEXT: vmovx.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 ; CHECK-MVE-NEXT: csetm r3, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s6, s2 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: csetm r0, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s6, s2 ; CHECK-MVE-NEXT: csetm r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 @@ -329,12 +329,12 @@ define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> % ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: csetm r5, ne ; CHECK-MVE-NEXT: vmov.16 q1[0], r5 -; CHECK-MVE-NEXT: vmov.16 q1[1], r1 -; CHECK-MVE-NEXT: vmov.16 q1[2], r4 -; CHECK-MVE-NEXT: vmov.16 q1[3], r3 -; CHECK-MVE-NEXT: vmov.16 q1[4], r0 -; CHECK-MVE-NEXT: vmov.16 q1[5], lr -; CHECK-MVE-NEXT: vmov.16 q1[6], r2 +; CHECK-MVE-NEXT: vmov.16 q1[1], r4 +; CHECK-MVE-NEXT: vmov.16 q1[2], r1 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.16 q1[4], r3 +; CHECK-MVE-NEXT: vmov.16 q1[5], r2 +; CHECK-MVE-NEXT: vmov.16 q1[6], lr ; CHECK-MVE-NEXT: vmov.16 q1[7], r12 ; CHECK-MVE-NEXT: vand q0, q1, q0 ; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll index baf0076277e50c..6f2539e3cad9aa 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -43,10 +43,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: cset r0, mi -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 @@ -230,10 +228,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: cset r0, eq -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 @@ -271,17 +267,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s3, s7 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1090,53 +1086,53 @@ entry: define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %src2, <8 x half> %a, <8 x half> %b) { ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 ; CHECK-MVE-NEXT: vcmp.f16 s18, s16 -; CHECK-MVE-NEXT: vmovx.f16 s20, s8 +; CHECK-MVE-NEXT: vmovx.f16 s16, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s22, s12 +; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s5 -; CHECK-MVE-NEXT: vseleq.f16 s16, s22, s20 +; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s1 ; CHECK-MVE-NEXT: vcmp.f16 s8, s4 -; CHECK-MVE-NEXT: vins.f16 s0, s16 +; CHECK-MVE-NEXT: vmovx.f16 s4, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s8, s13 ; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: vmovx.f16 s12, s9 -; CHECK-MVE-NEXT: vmovx.f16 s16, s13 -; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: vmovx.f16 s5, s14 -; CHECK-MVE-NEXT: vseleq.f16 s4, s16, s12 +; CHECK-MVE-NEXT: vins.f16 s0, s16 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s12, s10 +; CHECK-MVE-NEXT: vmovx.f16 s8, s2 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vcmp.f16 s8, s4 -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 +; CHECK-MVE-NEXT: vmovx.f16 s4, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s8, s14 ; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 -; CHECK-MVE-NEXT: vseleq.f16 s4, s5, s12 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 -; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s6, s15 ; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s8 +; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s4 -; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} +; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: vcmp_une_v8f16: diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index fe82255bff6c8c..d42c393743f4f3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -46,10 +46,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2 ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: cset r0, mi -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 @@ -248,10 +246,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2 ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: cset r0, eq -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 @@ -292,17 +288,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s3, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1105,42 +1101,42 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half %src2, < ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: vmovx.f16 s5, s8 +; CHECK-MVE-NEXT: vmovx.f16 s5, s12 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s7, s12 +; CHECK-MVE-NEXT: vmovx.f16 s6, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s7, s5 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s13 ; CHECK-MVE-NEXT: vins.f16 s0, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s12, s13 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 +; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s10 -; CHECK-MVE-NEXT: vmovx.f16 s12, s14 +; CHECK-MVE-NEXT: vmovx.f16 s8, s14 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 +; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 +; CHECK-MVE-NEXT: vmovx.f16 s8, s15 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 -; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, s4 -; CHECK-MVE-NEXT: vseleq.f16 s6, s10, s8 +; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s6 @@ -1659,10 +1655,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %sr ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: cset r0, mi -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 @@ -1861,10 +1855,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %sr ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: cset r0, eq -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 @@ -1905,17 +1897,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s3 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2718,42 +2710,42 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half %src2, ; CHECK-MVE-LABEL: vcmp_r_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: vmovx.f16 s5, s8 +; CHECK-MVE-NEXT: vmovx.f16 s5, s12 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s7, s12 +; CHECK-MVE-NEXT: vmovx.f16 s6, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s7, s5 +; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s13 ; CHECK-MVE-NEXT: vins.f16 s0, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s12, s13 +; CHECK-MVE-NEXT: vmovx.f16 s6, s9 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 +; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s10 -; CHECK-MVE-NEXT: vmovx.f16 s12, s14 +; CHECK-MVE-NEXT: vmovx.f16 s8, s14 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s2 -; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 +; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 +; CHECK-MVE-NEXT: vmovx.f16 s8, s15 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 -; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s3 -; CHECK-MVE-NEXT: vseleq.f16 s6, s10, s8 +; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s6 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 16689f1e7ecd17..718657839d38db 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -43,10 +43,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: cset r0, mi -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 @@ -230,10 +228,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: cset r0, eq -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 @@ -271,17 +267,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s3, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1042,42 +1038,42 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s4 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: vcmp.f16 s12, #0 -; CHECK-MVE-NEXT: vmovx.f16 s13, s8 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s14 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vins.f16 s0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s4, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s5 -; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s9 ; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 +; CHECK-MVE-NEXT: vins.f16 s0, s12 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s6 -; CHECK-MVE-NEXT: vmovx.f16 s12, s10 +; CHECK-MVE-NEXT: vmovx.f16 s8, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s9, s5 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s10, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s7 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s3, s11, s7 ; CHECK-MVE-NEXT: vins.f16 s3, s4 @@ -1572,10 +1568,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x floa ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: cset r0, mi -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 @@ -1759,10 +1753,8 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x floa ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: cset r0, eq -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 @@ -1800,17 +1792,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s3, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -2571,42 +2563,42 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, <8 x half> ; CHECK-MVE-LABEL: vcmp_r_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s4 +; CHECK-MVE-NEXT: vmovx.f16 s14, s8 ; CHECK-MVE-NEXT: vcmp.f16 s12, #0 -; CHECK-MVE-NEXT: vmovx.f16 s13, s8 +; CHECK-MVE-NEXT: vmovx.f16 s12, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s14 +; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vins.f16 s0, s12 +; CHECK-MVE-NEXT: vmovx.f16 s4, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s5 -; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s9 ; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 +; CHECK-MVE-NEXT: vins.f16 s0, s12 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s6 -; CHECK-MVE-NEXT: vmovx.f16 s12, s10 +; CHECK-MVE-NEXT: vmovx.f16 s8, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s9, s5 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s10, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s7 +; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 +; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f16 s3, s11, s7 ; CHECK-MVE-NEXT: vins.f16 s3, s4 diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index ffbeeb19a4aebd..5078130f180779 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -2540,9 +2540,9 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask3(ptr %vp, <4 x i64> define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [0,4,6,1] -; CHECK-NEXT: vpermi2q 32(%rdi), %ymm2, %ymm3 +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2 +; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm3 = [4,0,2,5] +; CHECK-NEXT: vpermi2q (%rdi), %ymm2, %ymm3 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 ; CHECK-NEXT: vmovdqa64 %ymm3, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -2556,10 +2556,10 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> % define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask4(ptr %vp, <4 x i64> %mask) { ; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mem_mask4: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm2 -; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,4,6,1] +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm2 +; CHECK-NEXT: vpmovsxbq {{.*#+}} ymm1 = [4,0,2,5] ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 -; CHECK-NEXT: vpermi2q 32(%rdi), %ymm2, %ymm1 {%k1} {z} +; CHECK-NEXT: vpermi2q (%rdi), %ymm2, %ymm1 {%k1} {z} ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %vec = load <8 x i64>, ptr %vp @@ -3514,12 +3514,11 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mem_mask2(ptr %vp, <8 x ; CHECK-FAST-PERLANE-LABEL: test_masked_16xfloat_to_8xfloat_perm_mem_mask2: ; CHECK-FAST-PERLANE: # %bb.0: ; CHECK-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm2 -; CHECK-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3 -; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm4 = [9,5,2,3,2,8,8,1] -; CHECK-FAST-PERLANE-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4 +; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm3 = [1,13,10,11,10,0,0,9] +; CHECK-FAST-PERLANE-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm3 ; CHECK-FAST-PERLANE-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm2, %ymm1, %k1 -; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm4, %ymm0 {%k1} +; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm3, %ymm0 {%k1} ; CHECK-FAST-PERLANE-NEXT: retq %vec = load <16 x float>, ptr %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <8 x i32> @@ -3542,11 +3541,10 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2(ptr %vp, <8 ; CHECK-FAST-PERLANE-LABEL: test_masked_z_16xfloat_to_8xfloat_perm_mem_mask2: ; CHECK-FAST-PERLANE: # %bb.0: ; CHECK-FAST-PERLANE-NEXT: vmovaps (%rdi), %xmm2 -; CHECK-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3 -; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = [9,5,2,3,2,8,8,1] -; CHECK-FAST-PERLANE-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm4, %ymm0, %k1 -; CHECK-FAST-PERLANE-NEXT: vpermi2ps %ymm2, %ymm3, %ymm1 {%k1} {z} +; CHECK-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm1 = [1,13,10,11,10,0,0,9] +; CHECK-FAST-PERLANE-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-FAST-PERLANE-NEXT: vcmpeqps %ymm3, %ymm0, %k1 +; CHECK-FAST-PERLANE-NEXT: vpermi2ps 32(%rdi), %ymm2, %ymm1 {%k1} {z} ; CHECK-FAST-PERLANE-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-FAST-PERLANE-NEXT: retq %vec = load <16 x float>, ptr %vp @@ -4398,9 +4396,9 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask0(ptr %vp, define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 x double> %vec2, <4 x double> %mask) { ; CHECK-FAST-LABEL: test_masked_8xdouble_to_4xdouble_perm_mem_mask1: ; CHECK-FAST: # %bb.0: -; CHECK-FAST-NEXT: vmovapd (%rdi), %ymm2 -; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm3 = [3,4,2,6] -; CHECK-FAST-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm3 +; CHECK-FAST-NEXT: vbroadcastsd 32(%rdi), %ymm2 +; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm3 = [7,0,6,2] +; CHECK-FAST-NEXT: vpermi2pd (%rdi), %ymm2, %ymm3 ; CHECK-FAST-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-FAST-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-FAST-NEXT: vmovapd %ymm3, %ymm0 {%k1} @@ -4423,11 +4421,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1(ptr %vp, <4 x double> %mask) { ; CHECK-FAST-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mem_mask1: ; CHECK-FAST: # %bb.0: -; CHECK-FAST-NEXT: vmovapd (%rdi), %ymm2 -; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [3,4,2,6] +; CHECK-FAST-NEXT: vbroadcastsd 32(%rdi), %ymm2 +; CHECK-FAST-NEXT: vmovapd {{.*#+}} ymm1 = [7,0,6,2] ; CHECK-FAST-NEXT: vxorpd %xmm3, %xmm3, %xmm3 ; CHECK-FAST-NEXT: vcmpeqpd %ymm3, %ymm0, %k1 -; CHECK-FAST-NEXT: vpermi2pd 32(%rdi){1to4}, %ymm2, %ymm1 {%k1} {z} +; CHECK-FAST-NEXT: vpermi2pd (%rdi), %ymm2, %ymm1 {%k1} {z} ; CHECK-FAST-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-FAST-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll index fca5aa046b03b9..63779727ec72c6 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll @@ -11,6 +11,10 @@ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count -basic-block-address-map-skip-bb-entries | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES +; RUN: not llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq -basic-block-address-map-skip-bb-entries 2>&1 | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES-ERROR +; RUN: not llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob -basic-block-address-map-skip-bb-entries 2>&1 | FileCheck %s --check-prefixes=SKIP-BB-ENTRIES-ERROR + ;; Verify that we emit an error if we try and specify values in addition to all/none ; RUN: not llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=all,bb-freq ; RUN: not llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=none,bb-freq @@ -134,3 +138,10 @@ declare i32 @__gxx_personality_v0(...) ; PGO-BRP-NEXT: .byte 5 # successor BB ID ; PGO-BRP-NEXT: .ascii "\200\200\200\200\b" # successor branch probability +; SKIP-BB-ENTRIES: .byte 17 # feature +; SKIP-BB-ENTRIES-NEXT: .quad .Lfunc_begin0 # function address +; SKIP-BB-ENTRIES-NEXT: .byte 6 # number of basic blocks +; SKIP-BB-ENTRIES-NEXT: .byte 100 # function entry count +; SKIP-BB-ENTRIES-NOT: # BB id + +; SKIP-BB-ENTRIES-ERROR: error: BB entries info is required for BBFreq and BrProb features diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index f371ec10fe25fe..6e41e1bb87eb2f 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -447,9 +447,8 @@ define <8 x i64> @elt5_v8i64(i64 %x) { ; X64-AVX512F-LABEL: elt5_v8i64: ; X64-AVX512F: # %bb.0: ; X64-AVX512F-NEXT: vmovq %rdi, %xmm1 -; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,2,3,4,8,6,7] -; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [42,1,2,3,4,0,6,7] -; X64-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 +; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,10,11,12,0,14,15] +; X64-AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 ; X64-AVX512F-NEXT: retq %ins = insertelement <8 x i64> , i64 %x, i32 5 ret <8 x i64> %ins diff --git a/llvm/test/CodeGen/X86/tailcall-nofpclass.ll b/llvm/test/CodeGen/X86/tailcall-nofpclass.ll new file mode 100644 index 00000000000000..fd085bb1244fb7 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcall-nofpclass.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s + +; Ensures that taillcall optimization can still be +; performed when nofpclass is used. + +define noundef nofpclass(nan inf) float @_Z3foof(float noundef nofpclass(nan inf) %0) { +; CHECK-LABEL: _Z3foof: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp expf@PLT # TAILCALL + %2 = tail call float @llvm.exp.f32(float %0) + ret float %2 +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 6360c68e62cc94..6fe16f85ec6bec 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2516,10 +2516,8 @@ define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, ptr %a1) { ; ; AVX512VL-LABEL: shuffle_mem_v4f32_0624: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovaps (%rdi), %xmm2 -; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4] -; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1 -; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [4,2,6,0] +; AVX512VL-NEXT: vpermt2ps (%rdi), %xmm1, %xmm0 ; AVX512VL-NEXT: retq %1 = load <4 x float>, ptr %a1 %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 97c6c4afa59909..469c087ec9c080 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -1101,6 +1101,121 @@ define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_ ret <64 x i8> %r } +define <64 x i8> @shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124(<64 x i8> %a0, <64 x i8> %a1) { +; AVX512F-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] +; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28] +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] +; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] +; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60] +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] +; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28] +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] +; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] +; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60] +; AVX512VBMI-NEXT: retq + %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> + ret <64 x i8> %r +} + +; PR79799 +define <64 x i8> @shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125(<64 x i8> %a0, <64 x i8> %a1) { +; AVX512F-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] +; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29] +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] +; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] +; AVX512BW-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] +; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29] +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] +; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vpmovsxbw {{.*#+}} zmm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] +; AVX512VBMI-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 +; AVX512VBMI-NEXT: retq + %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> + ret <64 x i8> %r +} + +define <64 x i8> @shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126(<64 x i8> %a0, <64 x i8> %a1) { +; AVX512F-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] +; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] +; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] +; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1] +; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1] +; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5] +; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62] +; AVX512VBMI-NEXT: retq + %r = shufflevector <64 x i8> %a1, <64 x i8> %a0, <64 x i32> + ret <64 x i8> %r +} + define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126: ; AVX512F: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll index 8cc20ec3c1a7e8..3fd73319e85774 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -339,7 +339,7 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 ^ (zmm2 & (zmm0 ^ zmm1)) ; AVX512F-NEXT: ret{{[l|q]}} entry: %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> @@ -354,15 +354,10 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ ; AVX512-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; AVX512-NEXT: ret{{[l|q]}} ; -; X86-AVX512F-LABEL: test_mm512_mask_blend_epi16: -; X86-AVX512F: # %bb.0: # %entry -; X86-AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm1, %zmm0 -; X86-AVX512F-NEXT: retl -; -; X64-AVX512F-LABEL: test_mm512_mask_blend_epi16: -; X64-AVX512F: # %bb.0: # %entry -; X64-AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 -; X64-AVX512F-NEXT: retq +; AVX512F-LABEL: test_mm512_mask_blend_epi16: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1)) +; AVX512F-NEXT: ret{{[l|q]}} entry: %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> ret <32 x i16> %0 @@ -486,18 +481,14 @@ define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) { ; X86-AVX512-LABEL: test_masked_permps_v8f32: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512-NEXT: vmovaps (%eax), %ymm2 -; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] -; X86-AVX512-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 -; X86-AVX512-NEXT: vmovaps %ymm1, %ymm0 +; X86-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [15,14,11,3,15,14,6,7] +; X86-AVX512-NEXT: vpermt2ps (%eax), %ymm1, %ymm0 ; X86-AVX512-NEXT: retl ; ; X64-AVX512-LABEL: test_masked_permps_v8f32: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovaps (%rdi), %ymm2 -; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,3,11,7,6,14,15] -; X64-AVX512-NEXT: vpermi2ps %ymm0, %ymm2, %ymm1 -; X64-AVX512-NEXT: vmovaps %ymm1, %ymm0 +; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [15,14,11,3,15,14,6,7] +; X64-AVX512-NEXT: vpermt2ps (%rdi), %ymm1, %ymm0 ; X64-AVX512-NEXT: retq ; ; X86-AVX512F-LABEL: test_masked_permps_v8f32: @@ -505,18 +496,18 @@ define <8 x float> @test_masked_permps_v8f32(ptr %vp, <8 x float> %vec2) { ; X86-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX512F-NEXT: vmovaps (%eax), %ymm1 -; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] -; X86-AVX512F-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 -; X86-AVX512F-NEXT: vmovaps %ymm1, %ymm0 +; X86-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [23,22,19,3,23,22,6,7] +; X86-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; X86-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; X86-AVX512F-NEXT: retl ; ; X64-AVX512F-LABEL: test_masked_permps_v8f32: ; X64-AVX512F: # %bb.0: ; X64-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; X64-AVX512F-NEXT: vmovaps (%rdi), %ymm1 -; X64-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [7,6,3,19,7,6,22,23] -; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1 -; X64-AVX512F-NEXT: vmovaps %ymm1, %ymm0 +; X64-AVX512F-NEXT: vmovaps {{.*#+}} ymm2 = [23,22,19,3,23,22,6,7] +; X64-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; X64-AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; X64-AVX512F-NEXT: retq %vec = load <8 x float>, ptr %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> @@ -528,35 +519,27 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) { ; X86-AVX512-LABEL: test_masked_permps_v16f32: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512-NEXT: vmovaps (%eax), %zmm2 -; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X86-AVX512-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X86-AVX512-NEXT: vmovaps %zmm1, %zmm0 +; X86-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X86-AVX512-NEXT: vpermt2ps (%eax), %zmm1, %zmm0 ; X86-AVX512-NEXT: retl ; ; X64-AVX512-LABEL: test_masked_permps_v16f32: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovaps (%rdi), %zmm2 -; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X64-AVX512-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X64-AVX512-NEXT: vmovaps %zmm1, %zmm0 +; X64-AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X64-AVX512-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 ; X64-AVX512-NEXT: retq ; ; X86-AVX512F-LABEL: test_masked_permps_v16f32: ; X86-AVX512F: # %bb.0: ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512F-NEXT: vmovaps (%eax), %zmm2 -; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X86-AVX512F-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X86-AVX512F-NEXT: vmovaps %zmm1, %zmm0 +; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X86-AVX512F-NEXT: vpermt2ps (%eax), %zmm1, %zmm0 ; X86-AVX512F-NEXT: retl ; ; X64-AVX512F-LABEL: test_masked_permps_v16f32: ; X64-AVX512F: # %bb.0: -; X64-AVX512F-NEXT: vmovaps (%rdi), %zmm2 -; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,13,11,19,14,12,22,23,7,6,3,27,7,29,3,31] -; X64-AVX512F-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1 -; X64-AVX512F-NEXT: vmovaps %zmm1, %zmm0 +; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [31,29,27,3,30,28,6,7,23,22,19,11,23,13,19,15] +; X64-AVX512F-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0 ; X64-AVX512F-NEXT: retq %vec = load <16 x float>, ptr %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index 6f9b3e94aa68f6..2b89590a0bb419 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -719,10 +719,9 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) { ; AVX512F: # %bb.0: ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1] -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,0,0,0,0] -; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2 -; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0 +; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [1,14,9,8,11,15,15,9] +; AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: # kill: def $al killed $al killed $eax ; AVX512F-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v48.ll b/llvm/test/CodeGen/X86/vector-shuffle-v48.ll index ed9f849d35d001..0efbe018764d28 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v48.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v48.ll @@ -79,10 +79,9 @@ define <32 x i8> @foo(ptr %x0) { ; ; AVX512VBMI-LABEL: foo: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 -; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm2 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [0,1,3,4,6,7,9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,33,34,36,37,39,40,42,43,45,46] -; AVX512VBMI-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 +; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm1 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [32,33,35,36,38,39,41,42,44,45,47,48,50,51,53,54,56,57,59,60,62,63,1,2,4,5,7,8,10,11,13,14] +; AVX512VBMI-NEXT: vpermi2b (%rdi), %ymm1, %ymm0 ; AVX512VBMI-NEXT: retq %1 = load <48 x i8>, ptr %x0, align 1 %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <32 x i32> diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index ac267544f0c0e8..181f5651784d8a 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -4895,11 +4895,10 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i ; ; AVX512BW-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,0,2,0,8,0,6,0] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,0,10,0,0,0,14,0] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper @@ -4997,11 +4996,10 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i ; ; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,32,3,32,5,32,7,32,9,32,11,32,13,32,15,32,17,32,19,32,21,32,23,32,25,32,27,32,29,32,31] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63] -; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5411,13 +5409,12 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15] ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31] -; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512F-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512F-NEXT: vzeroupper @@ -5425,13 +5422,12 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15] ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31] -; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512DQ-NEXT: vzeroupper @@ -5439,11 +5435,10 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in. ; ; AVX512BW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31] -; AVX512BW-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2d (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5679,13 +5674,12 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7] ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15] -; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512F-NEXT: vzeroupper @@ -5693,13 +5687,12 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7] ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15] -; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512DQ-NEXT: vzeroupper @@ -5707,11 +5700,10 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i ; ; AVX512BW-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5938,13 +5930,12 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr % ; ; AVX512F-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7] ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15] -; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512F-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512F-NEXT: vzeroupper @@ -5952,13 +5943,12 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr % ; ; AVX512DQ-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7] ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15] -; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0 +; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0 ; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2, %ymm1 +; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1 ; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx) ; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx) ; AVX512DQ-NEXT: vzeroupper @@ -5966,11 +5956,10 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr % ; ; AVX512BW-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7] ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0 +; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0 ; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll index 9b13897293dc1b..c5995bbc479a6a 100644 --- a/llvm/test/CodeGen/Xtensa/mul.ll +++ b/llvm/test/CodeGen/Xtensa/mul.ll @@ -4,7 +4,8 @@ define signext i32 @square(i32 %a) nounwind { ; XTENSA-LABEL: square: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a8, .LCPI0_0 @@ -20,7 +21,8 @@ define signext i32 @square(i32 %a) nounwind { define signext i32 @mul(i32 %a, i32 %b) nounwind { ; XTENSA-LABEL: mul: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a8, .LCPI1_0 @@ -35,7 +37,8 @@ define signext i32 @mul(i32 %a, i32 %b) nounwind { define signext i32 @mul_constant(i32 %a) nounwind { ; XTENSA-LABEL: mul_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 5 @@ -51,7 +54,8 @@ define signext i32 @mul_constant(i32 %a) nounwind { define i32 @mul_pow2(i32 %a) nounwind { ; XTENSA-LABEL: mul_pow2: -; XTENSA: slli a2, a2, 3 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 3 ; XTENSA-NEXT: ret %1 = mul i32 %a, 8 ret i32 %1 @@ -59,7 +63,8 @@ define i32 @mul_pow2(i32 %a) nounwind { define i64 @mul64(i64 %a, i64 %b) nounwind { ; XTENSA-LABEL: mul64: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a8, .LCPI4_0 @@ -74,7 +79,8 @@ define i64 @mul64(i64 %a, i64 %b) nounwind { define i64 @mul64_constant(i64 %a) nounwind { ; XTENSA-LABEL: mul64_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 5 @@ -91,7 +97,8 @@ define i64 @mul64_constant(i64 %a) nounwind { define i32 @mulhs(i32 %a, i32 %b) nounwind { ; XTENSA-LABEL: mulhs: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: or a4, a3, a3 @@ -114,7 +121,8 @@ define i32 @mulhs(i32 %a, i32 %b) nounwind { define i32 @mulhs_positive_constant(i32 %a) nounwind { ; XTENSA-LABEL: mulhs_positive_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: srai a3, a2, 31 @@ -136,7 +144,8 @@ define i32 @mulhs_positive_constant(i32 %a) nounwind { define i32 @mulhs_negative_constant(i32 %a) nounwind { ; XTENSA-LABEL: mulhs_negative_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: srai a3, a2, 31 @@ -158,7 +167,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind { define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind { ; XTENSA-LABEL: mulhu: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: or a4, a3, a3 @@ -181,7 +191,8 @@ define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind { define i32 @mulhsu(i32 %a, i32 %b) nounwind { ; XTENSA-LABEL: mulhsu: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: or a4, a3, a3 @@ -204,7 +215,8 @@ define i32 @mulhsu(i32 %a, i32 %b) nounwind { define i32 @mulhu_constant(i32 %a) nounwind { ; XTENSA-LABEL: mulhu_constant: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 5 @@ -226,7 +238,8 @@ define i32 @mulhu_constant(i32 %a) nounwind { define i32 @muli32_p65(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 65 @@ -242,7 +255,8 @@ define i32 @muli32_p65(i32 %a) nounwind { define i32 @muli32_p63(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 63 @@ -258,7 +272,8 @@ define i32 @muli32_p63(i32 %a) nounwind { define i64 @muli64_p65(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 65 @@ -275,7 +290,8 @@ define i64 @muli64_p65(i64 %a) nounwind { define i64 @muli64_p63(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, 63 @@ -292,7 +308,8 @@ define i64 @muli64_p63(i64 %a) nounwind { define i32 @muli32_m63(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, -63 @@ -308,7 +325,8 @@ define i32 @muli32_m63(i32 %a) nounwind { define i32 @muli32_m65(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, -65 @@ -324,7 +342,8 @@ define i32 @muli32_m65(i32 %a) nounwind { define i64 @muli64_m63(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m63: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, -63 @@ -341,7 +360,8 @@ define i64 @muli64_m63(i64 %a) nounwind { define i64 @muli64_m65(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m65: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a4, -65 @@ -358,7 +378,8 @@ define i64 @muli64_m65(i64 %a) nounwind { define i32 @muli32_p384(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p384: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: movi a3, 384 @@ -374,7 +395,8 @@ define i32 @muli32_p384(i32 %a) nounwind { define i32 @muli32_p12288(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p12288: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI21_0 @@ -390,7 +412,8 @@ define i32 @muli32_p12288(i32 %a) nounwind { define i32 @muli32_p4352(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI22_0 @@ -406,7 +429,8 @@ define i32 @muli32_p4352(i32 %a) nounwind { define i32 @muli32_p3840(i32 %a) nounwind { ; XTENSA-LABEL: muli32_p3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI23_0 @@ -422,7 +446,8 @@ define i32 @muli32_p3840(i32 %a) nounwind { define i32 @muli32_m3840(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI24_0 @@ -438,7 +463,8 @@ define i32 @muli32_m3840(i32 %a) nounwind { define i32 @muli32_m4352(i32 %a) nounwind { ; XTENSA-LABEL: muli32_m4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a3, .LCPI25_0 @@ -454,7 +480,8 @@ define i32 @muli32_m4352(i32 %a) nounwind { define i64 @muli64_p4352(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI26_0 @@ -471,7 +498,8 @@ define i64 @muli64_p4352(i64 %a) nounwind { define i64 @muli64_p3840(i64 %a) nounwind { ; XTENSA-LABEL: muli64_p3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI27_0 @@ -488,7 +516,8 @@ define i64 @muli64_p3840(i64 %a) nounwind { define i64 @muli64_m4352(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m4352: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI28_0 @@ -505,7 +534,8 @@ define i64 @muli64_m4352(i64 %a) nounwind { define i64 @muli64_m3840(i64 %a) nounwind { ; XTENSA-LABEL: muli64_m3840: -; XTENSA: addi a8, a1, -16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -16 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill ; XTENSA-NEXT: l32r a4, .LCPI29_0 @@ -522,17 +552,123 @@ define i64 @muli64_m3840(i64 %a) nounwind { define i128 @muli128_m3840(i128 %a) nounwind { ; XTENSA-LABEL: muli128_m3840: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill -; XTENSA-NEXT: movi a7, -1 -; XTENSA-NEXT: s32i a7, a1, 4 -; XTENSA-NEXT: s32i a7, a1, 0 -; XTENSA-NEXT: l32r a6, .LCPI30_0 -; XTENSA-NEXT: l32r a8, .LCPI30_1 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -80 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 64 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a12, a1, 60 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 56 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 52 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 48 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a5, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a4, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: or a15, a3, a3 +; XTENSA-NEXT: l32r a14, .LCPI30_0 +; XTENSA-NEXT: movi a12, 0 +; XTENSA-NEXT: l32r a13, .LCPI30_1 +; XTENSA-NEXT: s32i a2, a1, 36 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 28 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 40 # 4-byte Folded Spill +; XTENSA-NEXT: or a2, a15, a15 +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: s32i a14, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a2, a8 +; XTENSA-NEXT: movi a8, 1 +; XTENSA-NEXT: s32i a8, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a15, a2, .LBB30_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB30_2: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: s32i a8, a1, 32 # 4-byte Folded Spill +; XTENSA-NEXT: movi a14, -1 +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: add a9, a2, a15 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: s32i a9, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a9, a2, .LBB30_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB30_4: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: l32i a9, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a2, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: add a15, a2, a15 +; XTENSA-NEXT: l32i a2, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a4, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a2, a8 +; XTENSA-NEXT: add a4, a15, a9 +; XTENSA-NEXT: l32i a7, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: or a8, a7, a7 +; XTENSA-NEXT: bltu a4, a15, .LBB30_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB30_6: +; XTENSA-NEXT: or a10, a7, a7 +; XTENSA-NEXT: l32i a11, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a15, a11, .LBB30_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a12, a12 +; XTENSA-NEXT: .LBB30_8: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: l32i a6, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a5, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a5, a6, .LBB30_10 +; XTENSA-NEXT: # %bb.9: +; XTENSA-NEXT: or a11, a12, a12 +; XTENSA-NEXT: .LBB30_10: +; XTENSA-NEXT: l32i a6, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: add a11, a6, a11 +; XTENSA-NEXT: add a10, a11, a10 +; XTENSA-NEXT: bltu a9, a2, .LBB30_12 +; XTENSA-NEXT: # %bb.11: +; XTENSA-NEXT: or a7, a12, a12 +; XTENSA-NEXT: .LBB30_12: +; XTENSA-NEXT: l32i a9, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a3, a9 +; XTENSA-NEXT: add a9, a9, a7 +; XTENSA-NEXT: add a9, a10, a9 +; XTENSA-NEXT: add a5, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a15, a1, 48 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 52 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 56 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 60 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a0, a1, 64 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 80 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %1 = mul i128 %a, -3840 @@ -541,17 +677,123 @@ define i128 @muli128_m3840(i128 %a) nounwind { define i128 @muli128_m63(i128 %a) nounwind { ; XTENSA-LABEL: muli128_m63: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill -; XTENSA-NEXT: movi a7, -1 -; XTENSA-NEXT: s32i a7, a1, 4 -; XTENSA-NEXT: s32i a7, a1, 0 -; XTENSA-NEXT: movi a6, -63 -; XTENSA-NEXT: l32r a8, .LCPI31_0 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -80 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 64 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a12, a1, 60 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 56 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 52 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 48 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a5, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a4, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: or a15, a3, a3 +; XTENSA-NEXT: movi a14, -63 +; XTENSA-NEXT: movi a12, 0 +; XTENSA-NEXT: l32r a13, .LCPI31_0 +; XTENSA-NEXT: s32i a2, a1, 36 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 28 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 40 # 4-byte Folded Spill +; XTENSA-NEXT: or a2, a15, a15 +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: s32i a14, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a2, a8 +; XTENSA-NEXT: movi a8, 1 +; XTENSA-NEXT: s32i a8, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a15, a2, .LBB31_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB31_2: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: s32i a8, a1, 32 # 4-byte Folded Spill +; XTENSA-NEXT: movi a14, -1 +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: add a9, a2, a15 +; XTENSA-NEXT: l32i a8, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: s32i a9, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: bltu a9, a2, .LBB31_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB31_4: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: l32i a9, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: add a15, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a2, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: add a15, a2, a15 +; XTENSA-NEXT: l32i a2, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a4, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a14, a14 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a2, a8 +; XTENSA-NEXT: add a4, a15, a9 +; XTENSA-NEXT: l32i a7, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: or a8, a7, a7 +; XTENSA-NEXT: bltu a4, a15, .LBB31_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB31_6: +; XTENSA-NEXT: or a10, a7, a7 +; XTENSA-NEXT: l32i a11, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a15, a11, .LBB31_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a10, a12, a12 +; XTENSA-NEXT: .LBB31_8: +; XTENSA-NEXT: or a11, a7, a7 +; XTENSA-NEXT: l32i a6, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a5, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a5, a6, .LBB31_10 +; XTENSA-NEXT: # %bb.9: +; XTENSA-NEXT: or a11, a12, a12 +; XTENSA-NEXT: .LBB31_10: +; XTENSA-NEXT: l32i a6, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: add a11, a6, a11 +; XTENSA-NEXT: add a10, a11, a10 +; XTENSA-NEXT: bltu a9, a2, .LBB31_12 +; XTENSA-NEXT: # %bb.11: +; XTENSA-NEXT: or a7, a12, a12 +; XTENSA-NEXT: .LBB31_12: +; XTENSA-NEXT: l32i a9, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: add a9, a3, a9 +; XTENSA-NEXT: add a9, a9, a7 +; XTENSA-NEXT: add a9, a10, a9 +; XTENSA-NEXT: add a5, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a3, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a15, a1, 48 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 52 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 56 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 60 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a0, a1, 64 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 80 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %1 = mul i128 %a, -63 @@ -560,22 +802,119 @@ define i128 @muli128_m63(i128 %a) nounwind { define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; XTENSA-LABEL: mulhsu_i64: -; XTENSA: addi a8, a1, -16 -; XTENSA-NEXT: or a1, a8, a8 -; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill -; XTENSA-NEXT: or a7, a5, a5 -; XTENSA-NEXT: or a6, a4, a4 -; XTENSA-NEXT: srai a8, a7, 31 -; XTENSA-NEXT: s32i a8, a1, 4 -; XTENSA-NEXT: s32i a8, a1, 0 -; XTENSA-NEXT: movi a4, 0 -; XTENSA-NEXT: l32r a8, .LCPI32_0 -; XTENSA-NEXT: or a5, a4, a4 -; XTENSA-NEXT: callx0 a8 -; XTENSA-NEXT: or a2, a4, a4 -; XTENSA-NEXT: or a3, a5, a5 -; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload -; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addi a8, a1, -64 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: s32i a0, a1, 56 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a12, a1, 52 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a13, a1, 48 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 44 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 40 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a5, a1, 28 # 4-byte Folded Spill +; XTENSA-NEXT: or a14, a4, a4 +; XTENSA-NEXT: or a15, a3, a3 +; XTENSA-NEXT: movi a12, 0 +; XTENSA-NEXT: l32r a13, .LCPI32_0 +; XTENSA-NEXT: s32i a2, a1, 32 # 4-byte Folded Spill +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a15, a1, 36 # 4-byte Folded Spill +; XTENSA-NEXT: or a2, a15, a15 +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: s32i a14, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: or a4, a14, a14 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: l32i a8, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: add a14, a2, a8 +; XTENSA-NEXT: movi a15, 1 +; XTENSA-NEXT: or a8, a15, a15 +; XTENSA-NEXT: bltu a14, a2, .LBB32_2 +; XTENSA-NEXT: # %bb.1: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB32_2: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: s32i a8, a1, 24 # 4-byte Folded Spill +; XTENSA-NEXT: l32i a2, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: l32i a4, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: add a9, a2, a14 +; XTENSA-NEXT: s32i a15, a1, 20 # 4-byte Folded Spill +; XTENSA-NEXT: or a8, a15, a15 +; XTENSA-NEXT: bltu a9, a2, .LBB32_4 +; XTENSA-NEXT: # %bb.3: +; XTENSA-NEXT: or a8, a12, a12 +; XTENSA-NEXT: .LBB32_4: +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: l32i a9, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: add a14, a9, a8 +; XTENSA-NEXT: l32i a2, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a12, a12 +; XTENSA-NEXT: l32i a15, a1, 28 # 4-byte Folded Reload +; XTENSA-NEXT: or a4, a15, a15 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a3, a1, 8 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a14, a1, 12 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a2, a1, 4 # 4-byte Folded Spill +; XTENSA-NEXT: add a14, a2, a14 +; XTENSA-NEXT: l32i a2, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: or a3, a15, a15 +; XTENSA-NEXT: or a4, a12, a12 +; XTENSA-NEXT: or a5, a12, a12 +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: s32i a2, a1, 0 # 4-byte Folded Spill +; XTENSA-NEXT: s32i a3, a1, 16 # 4-byte Folded Spill +; XTENSA-NEXT: srai a2, a15, 31 +; XTENSA-NEXT: or a3, a2, a2 +; XTENSA-NEXT: l32i a4, a1, 32 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a5, a1, 36 # 4-byte Folded Reload +; XTENSA-NEXT: callx0 a13 +; XTENSA-NEXT: or a8, a2, a2 +; XTENSA-NEXT: l32i a9, a1, 0 # 4-byte Folded Reload +; XTENSA-NEXT: add a10, a8, a9 +; XTENSA-NEXT: add a2, a14, a10 +; XTENSA-NEXT: l32i a6, a1, 20 # 4-byte Folded Reload +; XTENSA-NEXT: or a9, a6, a6 +; XTENSA-NEXT: bltu a2, a14, .LBB32_6 +; XTENSA-NEXT: # %bb.5: +; XTENSA-NEXT: or a9, a12, a12 +; XTENSA-NEXT: .LBB32_6: +; XTENSA-NEXT: or a11, a6, a6 +; XTENSA-NEXT: l32i a7, a1, 4 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a14, a7, .LBB32_8 +; XTENSA-NEXT: # %bb.7: +; XTENSA-NEXT: or a11, a12, a12 +; XTENSA-NEXT: .LBB32_8: +; XTENSA-NEXT: or a7, a6, a6 +; XTENSA-NEXT: l32i a5, a1, 24 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a4, a1, 12 # 4-byte Folded Reload +; XTENSA-NEXT: bltu a4, a5, .LBB32_10 +; XTENSA-NEXT: # %bb.9: +; XTENSA-NEXT: or a7, a12, a12 +; XTENSA-NEXT: .LBB32_10: +; XTENSA-NEXT: l32i a5, a1, 8 # 4-byte Folded Reload +; XTENSA-NEXT: add a7, a5, a7 +; XTENSA-NEXT: add a11, a7, a11 +; XTENSA-NEXT: bltu a10, a8, .LBB32_12 +; XTENSA-NEXT: # %bb.11: +; XTENSA-NEXT: or a6, a12, a12 +; XTENSA-NEXT: .LBB32_12: +; XTENSA-NEXT: l32i a8, a1, 16 # 4-byte Folded Reload +; XTENSA-NEXT: add a8, a3, a8 +; XTENSA-NEXT: add a8, a8, a6 +; XTENSA-NEXT: add a8, a11, a8 +; XTENSA-NEXT: add a3, a8, a9 +; XTENSA-NEXT: l32i a15, a1, 40 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a14, a1, 44 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a13, a1, 48 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a12, a1, 52 # 4-byte Folded Reload +; XTENSA-NEXT: l32i a0, a1, 56 # 4-byte Folded Reload +; XTENSA-NEXT: addi a8, a1, 64 ; XTENSA-NEXT: or a1, a8, a8 ; XTENSA-NEXT: ret %1 = zext i64 %a to i128 @@ -588,7 +927,8 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { define i8 @muladd_demand(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: muladd_demand: -; XTENSA: slli a8, a2, 1 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a8, a2, 1 ; XTENSA-NEXT: sub a8, a3, a8 ; XTENSA-NEXT: movi a9, 15 ; XTENSA-NEXT: and a2, a8, a9 @@ -601,7 +941,8 @@ define i8 @muladd_demand(i8 %x, i8 %y) nounwind { define i8 @mulsub_demand(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: mulsub_demand: -; XTENSA: addx2 a8, a2, a3 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addx2 a8, a2, a3 ; XTENSA-NEXT: movi a9, 15 ; XTENSA-NEXT: and a2, a8, a9 ; XTENSA-NEXT: ret @@ -613,7 +954,8 @@ define i8 @mulsub_demand(i8 %x, i8 %y) nounwind { define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: muladd_demand_2: -; XTENSA: slli a8, a2, 1 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a8, a2, 1 ; XTENSA-NEXT: sub a8, a3, a8 ; XTENSA-NEXT: movi a9, -16 ; XTENSA-NEXT: or a2, a8, a9 @@ -626,7 +968,8 @@ define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind { define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { ; XTENSA-LABEL: mulsub_demand_2: -; XTENSA: addx2 a8, a2, a3 +; XTENSA: # %bb.0: +; XTENSA-NEXT: addx2 a8, a2, a3 ; XTENSA-NEXT: movi a9, -16 ; XTENSA-NEXT: or a2, a8, a9 ; XTENSA-NEXT: ret @@ -638,7 +981,8 @@ define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { define signext i32 @mul_imm_2(i32 %a) nounwind { ; XTENSA-LABEL: mul_imm_2: -; XTENSA: slli a2, a2, 1 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 1 ; XTENSA-NEXT: ret %1 = mul i32 %a, 2 ret i32 %1 @@ -646,7 +990,8 @@ define signext i32 @mul_imm_2(i32 %a) nounwind { define signext i32 @mul_imm_1024(i32 %a) nounwind { ; XTENSA-LABEL: mul_imm_1024: -; XTENSA: slli a2, a2, 10 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 10 ; XTENSA-NEXT: ret %1 = mul i32 %a, 1024 ret i32 %1 @@ -654,7 +999,8 @@ define signext i32 @mul_imm_1024(i32 %a) nounwind { define signext i32 @mul_imm_16384(i32 %a) nounwind { ; XTENSA-LABEL: mul_imm_16384: -; XTENSA: slli a2, a2, 14 +; XTENSA: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 14 ; XTENSA-NEXT: ret %1 = mul i32 %a, 16384 ret i32 %1 @@ -662,7 +1008,9 @@ define signext i32 @mul_imm_16384(i32 %a) nounwind { define <4 x i32> @mul_vec_splat_constant(<4 x i32> %a) { ; XTENSA-LABEL: mul_vec_splat_constant: -; XTENSA: slli a2, a2, 2 +; XTENSA: .cfi_startproc +; XTENSA-NEXT: # %bb.0: +; XTENSA-NEXT: slli a2, a2, 2 ; XTENSA-NEXT: slli a3, a3, 2 ; XTENSA-NEXT: slli a4, a4, 2 ; XTENSA-NEXT: slli a5, a5, 2 diff --git a/llvm/test/Instrumentation/BoundsChecking/negative.ll b/llvm/test/Instrumentation/BoundsChecking/negative.ll new file mode 100644 index 00000000000000..d8fb117bd13af8 --- /dev/null +++ b/llvm/test/Instrumentation/BoundsChecking/negative.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Check that negative oob gep do not generate invalid check. +; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s +target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + + +@str = global [100 x i8] zeroinitializer, align 1 + +define i16 @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i8 [ 65, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TMP4:%.*]] ] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i8 [[I_0]], 76 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[TMP4]] +; CHECK: for.inc: +; CHECK-NEXT: [[I_0_C:%.*]] = sext i8 [[I_0]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 -65, [[I_0_C]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @str, i8 -65), i8 [[I_0]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 100, [[TMP0]] +; CHECK-NEXT: store i8 [[I_0]], ptr [[GEP]], align 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i8 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret i16 0 +; +entry: + br label %for.cond + +for.cond: + %i.0 = phi i8 [ 65, %entry ], [ %inc, %for.inc ] + %exitcond.not = icmp eq i8 %i.0, 76 + br i1 %exitcond.not, label %for.end, label %for.inc + +for.inc: ; preds = %for.cond + %gep = getelementptr i8, ptr getelementptr (i8, ptr @str, i8 -65), i8 %i.0 + store i8 %i.0, ptr %gep, align 1 + %inc = add nuw nsw i8 %i.0, 1 + br label %for.cond + +for.end: + ret i16 0 +} + diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s index fd436fe9fe0ddb..bb1840eb849dfe 100644 --- a/llvm/test/MC/AMDGPU/ds.s +++ b/llvm/test/MC/AMDGPU/ds.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=CI,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 405d152c93d867..ad1ce40ddd6a45 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -35,3 +35,85 @@ global_load_lds_dwordx4 v[2:3], off offset:4 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: global_load_lds_dwordx4 v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xf4,0xdd,0x02,0x00,0x04,0x00] global_load_lds_dwordx4 v2, s[4:5] offset:4 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e] +v_permlane16_swap_b32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e] +v_permlane16_swap_b32_e32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32_e64 v1, v2 + +// FIXME: Parsed as bound_ctrl:1? +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 bound_ctrl:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 fi:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 bound_ctrl:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32 v1, v2 bound_ctrl:1 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00] +v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 + +// FIXME: Swapped order not accepted +// v_permlane16_swap_b32 v1, v2 fi:1 bound_ctrl:1 + + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e] +v_permlane32_swap_b32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e] +v_permlane32_swap_b32_e32 v1, v2 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32_e64 v1, v2 + +// FIXME: Parsed as bound_ctrl:1? +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 bound_ctrl:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 fi:0 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 bound_ctrl:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32 v1, v2 bound_ctrl:1 fi:1 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00] +v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 + +// FIXME: Swapped order not accepted +// v_permlane32_swap_b32 v1, v2 fi:1 bound_ctrl:1 diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s new file mode 100644 index 00000000000000..3f9bf3beef3aac --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -0,0 +1,31 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 --implicit-check-not=error: %s + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, s0 + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, m0 + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, vcc + +// GFX950: :[[@LINE+1]]:27: error: invalid operand for instruction +v_permlane16_swap_b32 v0, vcc_lo + +// GFX950: :[[@LINE+1]]:23: error: invalid operand for instruction +v_permlane16_swap_b32 s0, v0 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:0 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 fi:1 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 fi:0 + +// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction +v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 fi:1 diff --git a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s index a5cca6ba5bd936..79ab8666234a2f 100644 --- a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s +++ b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple amdgcn < %s 2>&1 | FileCheck --strict-whitespace %s +# RUN: not llvm-mc -triple amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck --strict-whitespace %s # This tests the mnemonic spell checker. diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index 7b3bd5ece09884..783947544d2212 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -1,10 +1,8 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/mai-gfx950.s b/llvm/test/MC/AMDGPU/mai-gfx950.s index b1a57b9b98b8bc..23b1ba2c3cd13a 100644 --- a/llvm/test/MC/AMDGPU/mai-gfx950.s +++ b/llvm/test/MC/AMDGPU/mai-gfx950.s @@ -1416,3 +1416,147 @@ v_smfmac_f32_16x16x128_fp8_fp8 a[10:13], a[2:5], a[6:13], v2 cbsz:3 abid:1 // GFX950: v_smfmac_f32_16x16x128_fp8_fp8 a[10:13], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xc3,0xd3,0x02,0x0d,0x0e,0x1c] // ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x128_fp8_fp8 a[10:13], a[2:5], a[6:13], v3 cbsz:1 abid:3 + +//===----------------------------------------------------------------------===// +// v_smfmac_f32_32x32x64_bf8_bf8 +//===----------------------------------------------------------------------===// + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64bf8bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xcb,0xd3,0x02,0x09,0x06,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], v[2:5], a[4:11], v1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0a,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xcb,0xd3,0x02,0x09,0x0e,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], v[2:5], a[4:11], v3 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x0d,0x0a,0x04] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xcb,0xd3,0x02,0x0d,0x0a,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xcb,0xd3,0x02,0x0d,0x0e,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 + +//===----------------------------------------------------------------------===// +// v_smfmac_f32_32x32x64_bf8_fp8 +//===----------------------------------------------------------------------===// + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64bf8fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xce,0xd3,0x02,0x09,0x06,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], v[2:5], a[4:11], v1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0a,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xce,0xd3,0x02,0x09,0x0e,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], v[2:5], a[4:11], v3 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x0d,0x0a,0x04] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xce,0xd3,0x02,0x0d,0x0a,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xce,0xd3,0x02,0x0d,0x0e,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 + +//===----------------------------------------------------------------------===// +// v_smfmac_f32_32x32x64_fp8_bf8 +//===----------------------------------------------------------------------===// + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64fp8bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xcf,0xd3,0x02,0x09,0x06,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], v[2:5], a[4:11], v1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0a,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xcf,0xd3,0x02,0x09,0x0e,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], v[2:5], a[4:11], v3 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x0d,0x0a,0x04] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xcf,0xd3,0x02,0x0d,0x0a,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xcf,0xd3,0x02,0x0d,0x0e,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 + +//===----------------------------------------------------------------------===// +// v_smfmac_f32_32x32x64_fp8_fp8 +//===----------------------------------------------------------------------===// + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0e,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64fp8fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xd3,0xd3,0x02,0x09,0x06,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], v[2:5], a[4:11], v1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0a,0x0c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xd3,0xd3,0x02,0x09,0x0e,0x14] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], v[2:5], a[4:11], v3 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x0d,0x0a,0x04] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xd3,0xd3,0x02,0x0d,0x0a,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 + +// GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xd3,0xd3,0x02,0x0d,0x0e,0x1c] +// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 diff --git a/llvm/test/MC/AMDGPU/mimg-err.s b/llvm/test/MC/AMDGPU/mimg-err.s index 6cf92f29c27b78..bec33bab984ab3 100644 --- a/llvm/test/MC/AMDGPU/mimg-err.s +++ b/llvm/test/MC/AMDGPU/mimg-err.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s index 29e402d9496f16..54bb2b19b2e844 100644 --- a/llvm/test/MC/AMDGPU/mimg.s +++ b/llvm/test/MC/AMDGPU/mimg.s @@ -1,11 +1,9 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_0 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/regression/bug28165.s b/llvm/test/MC/AMDGPU/regression/bug28165.s index 1e31f204e8995f..6d04e13316b610 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28165.s +++ b/llvm/test/MC/AMDGPU/regression/bug28165.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI diff --git a/llvm/test/MC/AMDGPU/regression/bug28413.s b/llvm/test/MC/AMDGPU/regression/bug28413.s index 5fbf9f37d4a8de..7cf413d2d0a17c 100644 --- a/llvm/test/MC/AMDGPU/regression/bug28413.s +++ b/llvm/test/MC/AMDGPU/regression/bug28413.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s index b877bce22af56d..12e01321b967a4 100644 --- a/llvm/test/MC/AMDGPU/smrd.s +++ b/llvm/test/MC/AMDGPU/smrd.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN %s // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN %s // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefixes=GCN,CI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=VI %s -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s index c912b83ca61c27..59c93fefcfaa23 100644 --- a/llvm/test/MC/AMDGPU/sopk.s +++ b/llvm/test/MC/AMDGPU/sopk.s @@ -1,11 +1,9 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,VI %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,GFX9 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GCN,GFX10 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX11 %s -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s diff --git a/llvm/test/MC/AMDGPU/unknown-target-cpu.s b/llvm/test/MC/AMDGPU/unknown-target-cpu.s new file mode 100644 index 00000000000000..3d41e8eb5b2c45 --- /dev/null +++ b/llvm/test/MC/AMDGPU/unknown-target-cpu.s @@ -0,0 +1,15 @@ +// RUN: not llvm-mc -triple=amdgcn -show-encoding < %s | FileCheck %s +// RUN: not llvm-mc -triple=amdgcn -show-encoding -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s +// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding < %s | FileCheck %s + +// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4 ; encoding: [0x02,0x08,0x02,0x7c] +v_cmp_lt_f32 vcc, s2, v4 + +// CHECK: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x00] +v_cndmask_b32 v1, v2, v3, vcc + +// ERR: [[@LINE+1]]:1: error: instruction not supported on this GPU +v_mac_legacy_f32 v1, v3, s5 + +// CHECK: v_lshr_b32_e32 v0, v1, v2 ; encoding: [0x01,0x05,0x00,0x2a] +v_lshr_b32 v0, v1, v2 diff --git a/llvm/test/MC/AMDGPU/vintrp.s b/llvm/test/MC/AMDGPU/vintrp.s index db15f8eb4499dd..35720c95cf31e7 100644 --- a/llvm/test/MC/AMDGPU/vintrp.s +++ b/llvm/test/MC/AMDGPU/vintrp.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck -check-prefix=SI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SI %s // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s v_interp_p1_f32 v1, v0, attr0.x diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s index f7e5db7fa3d39f..af0d289e827eed 100644 --- a/llvm/test/MC/AMDGPU/vop1.s +++ b/llvm/test/MC/AMDGPU/vop1.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,CI,SICI,CIVI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,CIVI,VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index ade7ce95f17584..7317ab00ad7820 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop3-convert.s b/llvm/test/MC/AMDGPU/vop3-convert.s index 0f33a81c6ea0fe..02d576fdcd845b 100644 --- a/llvm/test/MC/AMDGPU/vop3-convert.s +++ b/llvm/test/MC/AMDGPU/vop3-convert.s @@ -1,9 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s index e600151410389d..94fc0ea8b3e9eb 100644 --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 0d2544002a9f21..ccae2611d4ffdd 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -1,11 +1,11 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI // Make sure interp instructions disassemble regardless of lds bank count // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck %s -check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index a15a48e507a627..c7cfb7ae67a979 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -1,7 +1,6 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,VI9 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,VI9 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOSICI,NOCI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s index 0c803a9819a831..0e007d5e360a31 100644 --- a/llvm/test/MC/AMDGPU/vop_sdwa.s +++ b/llvm/test/MC/AMDGPU/vop_sdwa.s @@ -1,7 +1,6 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,GFX89 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,GFX89 -// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOCI,NOSICI --implicit-check-not=error: // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOVI,NOGFX89 --implicit-check-not=error: diff --git a/llvm/test/MC/AMDGPU/vopc.s b/llvm/test/MC/AMDGPU/vopc.s index 55289c0a463fa9..9ff4f7eda73a0a 100644 --- a/llvm/test/MC/AMDGPU/vopc.s +++ b/llvm/test/MC/AMDGPU/vopc.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s index 825a0abc172240..27502eff89bfc8 100644 --- a/llvm/test/MC/AMDGPU/wave_any.s +++ b/llvm/test/MC/AMDGPU/wave_any.s @@ -1,13 +1,13 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s v_cmp_ge_i32_e32 s0, v0 -// GFX10: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d] +// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d] v_cmp_ge_i32_e32 vcc_lo, s0, v1 -// GFX10: v_cmp_ge_i32_e32 vcc, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d] +// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d] v_cmp_ge_i32_e32 vcc, s0, v2 -// GFX10: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d] +// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d] v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD // GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06] @@ -16,10 +16,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD // GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06] v_cmp_class_f32_e32 vcc_lo, s0, v0 -// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] +// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] v_cmp_class_f32_e32 vcc, s0, v0 -// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] +// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d] v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD // GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06] @@ -34,40 +34,40 @@ v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD // GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06] v_cndmask_b32_e32 v1, v2, v3, -// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] v_cndmask_b32_e32 v1, v2, v3, vcc_lo -// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] v_cndmask_b32_e32 v1, v2, v3, vcc -// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02] v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo -// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] +// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] +// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] v_add_co_ci_u32_e32 v3, v3, v4 -// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50] +// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50] v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo -// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] +// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc -// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] +// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] v_sub_co_ci_u32_e32 v3, v3, v4 -// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52] +// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52] v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] +// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc -// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] +// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] v_subrev_co_ci_u32_e32 v1, 0, v1 -// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54] +// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54] v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] @@ -76,7 +76,7 @@ v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr // GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] +// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06] v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] @@ -85,7 +85,7 @@ v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr // GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] +// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06] v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] @@ -94,10 +94,10 @@ v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] +// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06] v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] +// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD // GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] @@ -106,7 +106,7 @@ v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:U // GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e] v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] +// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 // GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00] @@ -189,8 +189,8 @@ v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 // GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] -v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] -// GFX10: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] +v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] +// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00] v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo // GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt index ce37e228f03fa3..3852845d308834 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt @@ -42,3 +42,35 @@ # GFX950: buffer_load_dwordx4 v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65] 0x00,0x10,0x5d,0xe0,0x00,0x00,0x02,0x65 + + +# GFX950: v_permlane16_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb3,0x02,0x7e] +0x02,0xb3,0x02,0x7e + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x00,0x99,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x10,0x99,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x18,0x99,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane16_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x99,0xd1,0x02,0x01,0x00,0x00] +0x01,0x08,0x99,0xd1,0x02,0x01,0x00,0x00 + + +# GFX950: v_permlane32_swap_b32_e32 v1, v2 ; encoding: [0x02,0xb5,0x02,0x7e] +0x02,0xb5,0x02,0x7e + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x00,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 ; encoding: [0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x10,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 bound_ctrl:1 fi:1 ; encoding: [0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x18,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: v_permlane32_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00] +0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt index bee43b2f581bc9..8adc8b79fbbf5e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt @@ -888,3 +888,91 @@ # GFX950: v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xc3,0xd3,0x02,0x0d,0x0a,0x04] 0x0a,0x0b,0xc3,0xd3,0x02,0x0d,0x0a,0x04 + + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xcb,0xd3,0x02,0x0d,0x0a,0x1c] +0x0a,0x8b,0xcb,0xd3,0x02,0x0d,0x0a,0x1c + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xcb,0xd3,0x02,0x0d,0x0e,0x1c] +0x0a,0x99,0xcb,0xd3,0x02,0x0d,0x0e,0x1c + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xcb,0xd3,0x02,0x09,0x06,0x14] +0x0a,0x80,0xcb,0xd3,0x02,0x09,0x06,0x14 + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xcb,0xd3,0x02,0x09,0x0e,0x14] +0x0a,0x80,0xcb,0xd3,0x02,0x09,0x0e,0x14 + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0a,0x0c] +0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0a,0x0c + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0e,0x0c] +0x0a,0x0b,0xcb,0xd3,0x02,0x09,0x0e,0x0c + +# GFX950: v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcb,0xd3,0x02,0x0d,0x0a,0x04] +0x0a,0x0b,0xcb,0xd3,0x02,0x0d,0x0a,0x04 + + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xce,0xd3,0x02,0x0d,0x0a,0x1c] +0x0a,0x8b,0xce,0xd3,0x02,0x0d,0x0a,0x1c + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xce,0xd3,0x02,0x0d,0x0e,0x1c] +0x0a,0x99,0xce,0xd3,0x02,0x0d,0x0e,0x1c + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xce,0xd3,0x02,0x09,0x06,0x14] +0x0a,0x80,0xce,0xd3,0x02,0x09,0x06,0x14 + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xce,0xd3,0x02,0x09,0x0e,0x14] +0x0a,0x80,0xce,0xd3,0x02,0x09,0x0e,0x14 + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0a,0x0c] +0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0a,0x0c + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0e,0x0c] +0x0a,0x0b,0xce,0xd3,0x02,0x09,0x0e,0x0c + +# GFX950: v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xce,0xd3,0x02,0x0d,0x0a,0x04] +0x0a,0x0b,0xce,0xd3,0x02,0x0d,0x0a,0x04 + + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xcf,0xd3,0x02,0x0d,0x0a,0x1c] +0x0a,0x8b,0xcf,0xd3,0x02,0x0d,0x0a,0x1c + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xcf,0xd3,0x02,0x0d,0x0e,0x1c] +0x0a,0x99,0xcf,0xd3,0x02,0x0d,0x0e,0x1c + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xcf,0xd3,0x02,0x09,0x06,0x14] +0x0a,0x80,0xcf,0xd3,0x02,0x09,0x06,0x14 + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xcf,0xd3,0x02,0x09,0x0e,0x14] +0x0a,0x80,0xcf,0xd3,0x02,0x09,0x0e,0x14 + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0a,0x0c] +0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0a,0x0c + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0e,0x0c] +0x0a,0x0b,0xcf,0xd3,0x02,0x09,0x0e,0x0c + +# GFX950: v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xcf,0xd3,0x02,0x0d,0x0a,0x04] +0x0a,0x0b,0xcf,0xd3,0x02,0x0d,0x0a,0x04 + + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], a[2:5], a[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x8b,0xd3,0xd3,0x02,0x0d,0x0a,0x1c] +0x0a,0x8b,0xd3,0xd3,0x02,0x0d,0x0a,0x1c + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], a[2:5], a[6:13], v3 cbsz:1 abid:3 ; encoding: [0x0a,0x99,0xd3,0xd3,0x02,0x0d,0x0e,0x1c] +0x0a,0x99,0xd3,0xd3,0x02,0x0d,0x0e,0x1c + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], v[2:5], a[4:11], v1 ; encoding: [0x0a,0x80,0xd3,0xd3,0x02,0x09,0x06,0x14] +0x0a,0x80,0xd3,0xd3,0x02,0x09,0x06,0x14 + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 a[10:25], v[2:5], a[4:11], v3 ; encoding: [0x0a,0x80,0xd3,0xd3,0x02,0x09,0x0e,0x14] +0x0a,0x80,0xd3,0xd3,0x02,0x09,0x0e,0x14 + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0a,0x0c] +0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0a,0x0c + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0e,0x0c] +0x0a,0x0b,0xd3,0xd3,0x02,0x09,0x0e,0x0c + +# GFX950: v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], v[2:5], v[6:13], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xd3,0xd3,0x02,0x0d,0x0a,0x04] +0x0a,0x0b,0xd3,0xd3,0x02,0x0d,0x0a,0x04 diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt index 8d6564dd098990..9a18097c8f9623 100644 --- a/llvm/test/MC/Disassembler/X86/x86-64.txt +++ b/llvm/test/MC/Disassembler/X86/x86-64.txt @@ -770,3 +770,9 @@ # CHECK: prefetchit1 (%rip) 0x0f,0x18,0x35,0x00,0x00,0x00,0x00 + +# Check that we correctly ignore a REX prefix that is not immediately before +# the opcode. REX prefixes not immediately preceding the Opcode are ignored +# according to Section 2.2.1 of the Intel 64 Architecture Manual. +# CHECK: orw $25659, %ax +0x66 0x4c 0x64 0x0d 0x3b 0x64 diff --git a/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir b/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir new file mode 100644 index 00000000000000..c69bc1b5eca649 --- /dev/null +++ b/llvm/test/MachineVerifier/RISCV/subreg-liveness.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=none %s -o - | FileCheck %s +# REQUIRES: riscv64-registered-target + +# During the MachineVerifier, it assumes that used registers have been defined +# In this test case, while $v12_v13_v14_v15_v16 covers $v14_v15, +# $v14_v15 is not a sub-register of $v14m2 even though they share the same register. +# This corner case can be resolved by checking the register using RegUnit. + +... +--- +name: func +tracksRegLiveness: true +tracksDebugUserValues: true +body: | + bb.0: + liveins: $v0, $v8, $v9, $v10, $v11 + + ; CHECK-LABEL: name: func + ; CHECK: liveins: $v0, $v8, $v9, $v10, $v11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $v16m2 = PseudoVMV_V_I_M2 undef renamable $v16m2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16 + renamable $v16m2 = PseudoVMV_V_I_M2 undef renamable $v16m2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + $v20m2 = VMV2R_V $v14m2, implicit $v12_v13_v14_v15_v16 + +... diff --git a/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc b/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc new file mode 100644 index 00000000000000..c98308f4637f84 Binary files /dev/null and b/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc differ diff --git a/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll b/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll new file mode 100644 index 00000000000000..20f95617915cc5 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll @@ -0,0 +1,28 @@ +;; Check that we can read the old *_ALLOC_INFO summary format that placed the +;; stack id indexes directly in the alloc info summary, rather than encoding as +;; a separate radix tree. +;; +;; The old bitcode was generated by the older compiler from `opt -thinlto-bc` +;; on the following LLVM assembly: +;; +;; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +;; target triple = "x86_64-unknown-linux-gnu" +;; +;; define internal ptr @_Z3barv() #0 { +;; entry: +;; %call = call ptr @_Znam(i64 0), !memprof !1, !callsite !6 +;; ret ptr null +;; } +;; +;; declare ptr @_Znam(i64) +;; +;; !1 = !{!2, !4} +;; !2 = !{!3, !"notcold"} +;; !3 = !{i64 9086428284934609951, i64 8632435727821051414} +;; !4 = !{!5, !"cold"} +;; !5 = !{i64 9086428284934609951, i64 2732490490862098848} +;; !6 = !{i64 9086428284934609951} + +; RUN: llvm-dis %S/Inputs/memprof-old-alloc-context-summary.bc -o - | FileCheck %s +; CHECK: stackIds: (8632435727821051414) +; CHECK-SAME: stackIds: (2732490490862098848) diff --git a/llvm/test/Transforms/Inline/LoongArch/inline-target-features.ll b/llvm/test/Transforms/Inline/LoongArch/inline-target-features.ll new file mode 100644 index 00000000000000..f7a37015e07fcd --- /dev/null +++ b/llvm/test/Transforms/Inline/LoongArch/inline-target-features.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -mtriple=loongarch64-unknown-linux-gnu -S -passes=inline | FileCheck %s +; RUN: opt < %s -mtriple=loongarch64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "loongarch64-unknown-linux-gnu" + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +attributes #0 = { "target-cpu"="generic-la64" "target-features"="+f,+d" } +attributes #1 = { "target-cpu"="generic-la64" "target-features"="+f,+d,+lsx,+lasx" } diff --git a/llvm/test/Transforms/Inline/LoongArch/lit.local.cfg b/llvm/test/Transforms/Inline/LoongArch/lit.local.cfg new file mode 100644 index 00000000000000..cc24278acbb414 --- /dev/null +++ b/llvm/test/Transforms/Inline/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll new file mode 100644 index 00000000000000..d9c105f753e264 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-- -passes=instcombine -S < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s + +define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1:[0-9]+]] +; OPT-NEXT: store i32 [[TMP]], ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: store i32 32, ptr addrspace(1) [[ARG]], align 4 +; OPT-W32-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 64, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +bb: + %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 + store i32 %tmp, ptr addrspace(1) %arg, align 4 + ret void +} + +define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] +; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1 +; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-W32-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: store i32 2, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: ret void +; +bb: + %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 + %tmp1 = icmp ugt i32 %tmp, 32 + %tmp2 = select i1 %tmp1, i32 2, i32 1 + store i32 %tmp2, ptr addrspace(1) %arg + ret void +} + +define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) { +; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) { +; OPT-NEXT: [[BB:.*:]] +; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]] +; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32 +; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]] +; OPT: [[BB2]]: +; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-NEXT: br label %[[BB3]] +; OPT: [[BB3]]: +; OPT-NEXT: ret void +; +; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W32-NEXT: [[BB:.*:]] +; OPT-W32-NEXT: br i1 false, label %[[BB2:.*]], label %[[BB3:.*]] +; OPT-W32: [[BB2]]: +; OPT-W32-NEXT: br label %[[BB3]] +; OPT-W32: [[BB3]]: +; OPT-W32-NEXT: ret void +; +; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( +; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] { +; OPT-W64-NEXT: [[BB:.*:]] +; OPT-W64-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB3:.*]] +; OPT-W64: [[BB2]]: +; OPT-W64-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4 +; OPT-W64-NEXT: br label %[[BB3]] +; OPT-W64: [[BB3]]: +; OPT-W64-NEXT: ret void +; +bb: + %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 + %tmp1 = icmp ugt i32 %tmp, 32 + br i1 %tmp1, label %bb2, label %bb3 + +bb2: ; preds = %bb + store i32 1, ptr addrspace(1) %arg, align 4 + br label %bb3 + +bb3: ; preds = %bb2, %bb + ret void +} + +declare i32 @llvm.amdgcn.wavefrontsize() #0 + +attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index 30b9fca6e97ada..c7bbc8ab56f9a6 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -5044,11 +5044,9 @@ define i1 @isnormal_logical_select_0_fmf1(half %x) { define i1 @and_fcmp_reassoc1(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc1( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[RETVAL]], [[CMP1]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b @@ -5059,11 +5057,9 @@ define i1 @and_fcmp_reassoc1(i1 %x, double %a, double %b) { define i1 @and_fcmp_reassoc2(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc2( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[RETVAL]], [[CMP1]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b @@ -5074,11 +5070,9 @@ define i1 @and_fcmp_reassoc2(i1 %x, double %a, double %b) { define i1 @and_fcmp_reassoc3(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc3( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b @@ -5089,11 +5083,9 @@ define i1 @and_fcmp_reassoc3(i1 %x, double %a, double %b) { define i1 @and_fcmp_reassoc4(i1 %x, double %a, double %b) { ; CHECK-LABEL: @and_fcmp_reassoc4( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = and i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = and i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp ult double %a, %b %cmp1 = fcmp ugt double %a, %b diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll index 00ee7bf643286f..d07c2e6a5be521 100644 --- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll +++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll @@ -1441,11 +1441,7 @@ define i1 @ne_optimized_highbits_cmp_todo_overlapping(i32 %x, i32 %y) { define i1 @and_trunc_i1(i8 %a1, i8 %a2) { ; CHECK-LABEL: @and_trunc_i1( -; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[A1:%.*]], [[A2:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[XOR]], 2 -; CHECK-NEXT: [[LOBIT:%.*]] = trunc i8 [[XOR]] to i1 -; CHECK-NEXT: [[LOBIT_INV:%.*]] = xor i1 [[LOBIT]], true -; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[LOBIT_INV]] +; CHECK-NEXT: [[AND:%.*]] = icmp eq i8 [[A1:%.*]], [[A2:%.*]] ; CHECK-NEXT: ret i1 [[AND]] ; %xor = xor i8 %a1, %a2 @@ -1494,10 +1490,7 @@ define i1 @and_trunc_i1_wrong_operands(i8 %a1, i8 %a2, i8 %a3) { define i1 @or_trunc_i1(i64 %a1, i64 %a2) { ; CHECK-LABEL: @or_trunc_i1( -; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[A2:%.*]], [[A1:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[XOR]], 1 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[XOR]] to i1 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], [[TRUNC]] +; CHECK-NEXT: [[OR:%.*]] = icmp ne i64 [[A2:%.*]], [[A1:%.*]] ; CHECK-NEXT: ret i1 [[OR]] ; %xor = xor i64 %a2, %a1 @@ -1538,3 +1531,28 @@ define i1 @or_trunc_i1_wrong_operands(i64 %a1, i64 %a2, i64 %a3) { %or = or i1 %cmp, %trunc ret i1 %or } + +define i1 @jv_identical(i64 %arg1, i64 %arg2) { +; CHECK-LABEL: @jv_identical( +; CHECK-NEXT: [[ARG1_TRUNC:%.*]] = trunc i64 [[ARG1:%.*]] to i8 +; CHECK-NEXT: [[ARG2_TRUNC:%.*]] = trunc i64 [[ARG2:%.*]] to i8 +; CHECK-NEXT: [[EQ1:%.*]] = icmp eq i8 [[ARG1_TRUNC]], [[ARG2_TRUNC]] +; CHECK-NEXT: [[DOTUNSHIFTED:%.*]] = xor i64 [[ARG2]], [[ARG1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[DOTUNSHIFTED]], 65536 +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[EQ1]], [[TMP1]] +; CHECK-NEXT: ret i1 [[AND2]] +; + %arg1.trunc = trunc i64 %arg1 to i8 + %arg1.shift = lshr i64 %arg1, 16 + %arg1.shift.trunc = trunc i64 %arg1.shift to i16 + %arg2.trunc = trunc i64 %arg2 to i8 + %arg2.shift = lshr i64 %arg2, 16 + %arg2.shift.trunc = trunc i64 %arg2.shift to i16 + %eq1 = icmp eq i8 %arg1.trunc, %arg2.trunc + %eq2 = icmp eq i16 %arg1.shift.trunc, %arg2.shift.trunc + %and1 = and i1 %eq1, %eq2 + %xor = xor i64 %arg2, %arg1 + %cmp = icmp ult i64 %xor, 4294967296 + %and2 = and i1 %cmp, %and1 + ret i1 %and2 +} diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll index f46940ff060d41..a4296a326c4bc6 100644 --- a/llvm/test/Transforms/InstCombine/fptrunc.ll +++ b/llvm/test/Transforms/InstCombine/fptrunc.ll @@ -90,6 +90,19 @@ define half @fptrunc_select_true_val_extra_use(half %x, float %y, i1 %cond) { ret half %r } +define half @fptrunc_max(half %arg) { +; CHECK-LABEL: @fptrunc_max( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt half [[ARG:%.*]], 0xH0000 +; CHECK-NEXT: [[NARROW_SEL:%.*]] = select i1 [[CMP]], half 0xH0000, half [[ARG]] +; CHECK-NEXT: ret half [[NARROW_SEL]] +; + %ext = fpext half %arg to double + %cmp = fcmp olt double %ext, 0.000000e+00 + %max = select i1 %cmp, double 0.000000e+00, double %ext + %trunc = fptrunc double %max to half + ret half %trunc +} + ; Negative test - this would require an extra instruction. define half @fptrunc_select_true_val_extra_use_2(half %x, float %y, i1 %cond) { diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll index a2842f7a45f597..193fe4b5cc722f 100644 --- a/llvm/test/Transforms/InstCombine/or-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll @@ -54,7 +54,7 @@ define i1 @PR41069(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[UNO1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[R]] ; %uno1 = fcmp uno double %a, %b @@ -87,7 +87,7 @@ define i1 @PR41069_commute(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069_commute( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[UNO1]], [[TMP1]] ; CHECK-NEXT: ret i1 [[R]] ; %uno1 = fcmp uno double %a, %b @@ -4608,11 +4608,9 @@ define i1 @intersect_fmf_4(double %a, double %b) { define i1 @or_fcmp_reassoc1(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc1( -; CHECK-NEXT: [[OR:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[OR]], [[CMP1:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[RETVAL]], [[CMP2]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b @@ -4623,11 +4621,9 @@ define i1 @or_fcmp_reassoc1(i1 %x, double %a, double %b) { define i1 @or_fcmp_reassoc2(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc2( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[RETVAL]], [[CMP1]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b @@ -4638,11 +4634,9 @@ define i1 @or_fcmp_reassoc2(i1 %x, double %a, double %b) { define i1 @or_fcmp_reassoc3(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc3( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b @@ -4653,11 +4647,9 @@ define i1 @or_fcmp_reassoc3(i1 %x, double %a, double %b) { define i1 @or_fcmp_reassoc4(i1 %x, double %a, double %b) { ; CHECK-LABEL: @or_fcmp_reassoc4( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[A]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[RETVAL:%.*]] = or i1 [[X:%.*]], [[TMP1]] -; CHECK-NEXT: [[RETVAL1:%.*]] = or i1 [[CMP1]], [[RETVAL]] -; CHECK-NEXT: ret i1 [[RETVAL1]] +; CHECK-NEXT: ret i1 [[RETVAL]] ; %cmp = fcmp olt double %a, %b %cmp1 = fcmp ogt double %a, %b diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 414f39d557044a..ab541f6fa94e66 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -60,7 +60,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: -; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; IF-EVL-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; IF-EVL-OUTLOOP-NEXT: No successors ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: scalar.ph: @@ -110,7 +110,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: -; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; IF-EVL-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: @@ -156,7 +156,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: -; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; NO-VP-OUTLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: @@ -202,7 +202,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: -; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]>) +; NO-VP-INLOOP-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RDX_EX]]> from middle.block) ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index db2c8188a7cb35..d5f1b46bd5421f 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -18,7 +18,7 @@ ; CHECK: br label %vector.body ; ; CHECK: vector.body: -; CHECK: br i1 %8, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]] +; CHECK: br i1 {{.+}}, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]] ; ; CHECK: middle.block: ; CHECK: br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] @@ -30,10 +30,10 @@ ; CHECK: br label %vec.epilog.vector.body ; ; CHECK: vec.epilog.vector.body: -; CHECK: br i1 %12, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] +; CHECK: br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] ; ; CHECK: vec.epilog.middle.block: -; CHECK: br i1 %cmp.n12, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] +; CHECK: br i1 %cmp.n11, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] ; ; CHECK: vec.epilog.scalar.ph: ; CHECK: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index bcacfb358ec05a..517de8be5c9987 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -48,8 +48,8 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) -; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors @@ -125,9 +125,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) -; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1) -; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) +; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) +; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2 from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors @@ -205,8 +205,8 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) -; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors ; CHECK-NEXT: } ; @@ -279,8 +279,8 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]>) -; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1) +; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) +; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 8ae538cf63986b..d0c811763a522a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -85,7 +85,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -172,7 +172,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -235,7 +235,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %res = phi i32 [ %and.red.next, %loop ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -244,7 +244,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 @@ -355,7 +355,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -452,7 +452,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next ; CHECK-NEXT: No successors @@ -539,7 +539,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]>) +; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK: IR %ec = icmp ugt i64 %iv, 3 ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index a71666d8c3167a..dd58dc81ccedde 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -227,7 +227,7 @@ exit: ; DBG-EMPTY: ; DBG-NEXT: ir-bb: ; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]>) +; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; DBG: IR %ec = icmp slt i32 %iv.next.trunc, %n ; DBG-NEXT: No successors ; DBG-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 6bb20a301e0ade..195f6a48640e54 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -171,7 +171,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %red.next.lcssa = phi float [ %red.next, %for.body ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -476,7 +476,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]>) +; CHECK-NEXT: IR %muladd.lcssa = phi float [ %muladd, %for.body ] (extra operand: vp<[[RED_EX]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -716,7 +716,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]>) +; CHECK-NEXT: IR %lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[EXIT]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1111,7 +1111,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb -; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]>) +; CHECK-NEXT: IR %for.1.lcssa = phi i16 [ %for.1, %loop ] (extra operand: vp<[[FOR_RESULT]]> from middle.block) ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph @@ -1119,7 +1119,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]>) +; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll index d6073ea4bbbae6..289807a808d5db 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll @@ -12,7 +12,7 @@ define void @test() { ; CHECK: [[BB63]]: ; CHECK-NEXT: br label %[[BB64]] ; CHECK: [[BB64]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi <16 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ] ; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16 ; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4 ; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8 @@ -37,28 +37,30 @@ define void @test() { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15 ; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]] ; CHECK: [[BB77]]: -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[I66]], i32 1 +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> ; CHECK-NEXT: br label %[[BB78:.*]] ; CHECK: [[BB78]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ [[TMP31]], %[[BB77]] ], [ [[TMP37:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2) -; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP13]] +; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP38]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]] ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison ; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison -; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP37]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]] ; CHECK: [[BB167]]: ; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index e2d1a29ee22de4..4755c690c07111 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -307,14 +307,14 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> @@ -395,10 +395,10 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 @@ -483,15 +483,15 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index bc24a44cecbe39..85131758853b3d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -877,20 +877,10 @@ entry: define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-LABEL: @red_zext_ld_4xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 -; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 -; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 -; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] -; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 -; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 -; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 -; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) +; CHECK-NEXT: [[ADD_3:%.*]] = zext i16 [[TMP2]] to i64 ; CHECK-NEXT: ret i64 [[ADD_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll index e4d20a6db8fa67..09c11bbefd4a35 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll @@ -8,7 +8,7 @@ ; YAML-NEXT: Function: test ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost ' -; YAML-NEXT: - Cost: '-1' +; YAML-NEXT: - Cost: '-10' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '8' ; YAML-NEXT:... diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 2a036cc8fe326f..4282ae2ab88ec7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -33,16 +33,15 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7]] = phi <4 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP11:%.*]], [[BB12]] ] ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: ; CHECK-NEXT: [[LOCAL_10_38123_LCSSA:%.*]] = phi i32 [ [[TMP10]], [[BB3]] ] ; CHECK-NEXT: [[LOCAL_5_33118_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 0 -; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP12]], i32 [[LOCAL_5_33118_LCSSA]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 2 +; CHECK-NEXT: [[TMP9]] = insertelement <4 x i32> [[TMP8]], i32 [[LOCAL_5_33118_LCSSA]], i32 3 ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void @@ -51,8 +50,8 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: [[LOCAL_5_84111_LCSSA:%.*]] = phi i32 [ [[LOCAL_5_84111]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 0 -; CHECK-NEXT: [[TMP9]] = insertelement <2 x i32> [[TMP11]], i32 [[LOCAL_5_84111_LCSSA]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 2 +; CHECK-NEXT: [[TMP11]] = insertelement <4 x i32> [[TMP12]], i32 [[LOCAL_5_84111_LCSSA]], i32 3 ; CHECK-NEXT: br label [[BB9]] ; bb1: diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll index 405afd5969a413..5c9058b4823202 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll @@ -276,21 +276,19 @@ if.false: ; preds = %if.true, %entry } ;; Both of successor 0 and successor 1 have a single predecessor. -;; TODO: Support transform for this case. -define void @single_predecessor(ptr %p, ptr %q, i32 %a) { +define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) { ; CHECK-LABEL: @single_predecessor( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: common.ret: -; CHECK-NEXT: ret void -; CHECK: if.end: -; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 -; CHECK-NEXT: br label [[COMMON_RET:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]]) +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3 +; CHECK-NEXT: ret i32 [[DOT]] ; entry: %tobool = icmp ne i32 %a, 0 @@ -298,12 +296,12 @@ entry: if.end: store i32 1, ptr %q - ret void + ret i32 2 if.then: %0 = load i32, ptr %q store i32 %0, ptr %p - ret void + ret i32 3 } ;; Hoist 6 stores. @@ -759,6 +757,44 @@ if.true: ret i32 %res } +;; Not transform if either BB has multiple successors. +define i32 @not_multi_successors(i1 %c1, i32 %c2, ptr %p) { +; CHECK-LABEL: @not_multi_successors( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]] +; CHECK: entry.if: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: switch i32 [[C2:%.*]], label [[COMMON_RET]] [ +; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 1, label [[SW_BB]] +; CHECK-NEXT: ] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL]], [[ENTRY_IF]] ], [ 0, [[SW_BB]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: sw.bb: +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + br i1 %c1, label %entry.if, label %entry.else + +entry.if: ; preds = %entry + %val = load i32, ptr %p, align 4 + switch i32 %c2, label %return [ + i32 0, label %sw.bb + i32 1, label %sw.bb + ] + +entry.else: ; preds = %entry + ret i32 0 + +sw.bb: ; preds = %entry.if, %entry.if + br label %return + +return: ; preds = %sw.bb, %entry.if + %ret = phi i32 [ %val, %entry.if ], [ 0, %sw.bb ] + ret i32 %ret +} + declare i32 @read_memory_only() readonly nounwind willreturn speculatable !llvm.dbg.cu = !{!0} diff --git a/llvm/test/Verifier/AMDGPU/mfma-scale.ll b/llvm/test/Verifier/AMDGPU/mfma-scale.ll new file mode 100644 index 00000000000000..1e3e8856df3d10 --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/mfma-scale.ll @@ -0,0 +1,230 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +; -------------------------------------------------------------------- +; Wrong mangled types +; -------------------------------------------------------------------- + +; CHECK: operand 0 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v4i64.v8i32(<4 x i64> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 2, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i64> %arg0 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v4i64_fp8__v8i32_fp8(<4 x i64> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v4i64.v8i32(<4 x i64> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 0, ; cbsz + i32 2, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: operand 1 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v4i64(<8 x i32> %arg0, <4 x i64> %arg1, <4 x float> %arg2, i32 0, i32 2, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i64> %arg1 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8v4i64_fp8(<8 x i32> %arg0, <4 x i64> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v4i64(<8 x i32> %arg0, <4 x i64> %arg1, <4 x float> %arg2, + i32 0, ; cbsz + i32 2, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: operand 0 must be 4, 6 or 8 element i32 vector +; CHECK: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v4i64.v8i32(<4 x i64> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK: <4 x i64> %arg0 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v4i64_fp8__v8i32_fp8(<4 x i64> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v4i64.v8i32(<4 x i64> %arg0, <8 x i32> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 2, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + +; CHECK: operand 1 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v4i64(<8 x i32> %arg0, <4 x i64> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i64> %arg1 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_fp8v4i64_fp8(<8 x i32> %arg0, <4 x i64> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v4i64(<8 x i32> %arg0, <4 x i64> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 2, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + +; -------------------------------------------------------------------- +; Impossible vector types +; -------------------------------------------------------------------- + +; CHECK: operand 0 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v5i32.v8i32(<5 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 4, i32 4, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <5 x i32> %arg0 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v5i32_fp4__v8i32_fp4(<5 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v4i64.v8i32(<5 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 4, ; cbsz + i32 4, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: operand 1 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v5i32(<8 x i32> %arg0, <5 x i32> %arg1, <4 x float> %arg2, i32 4, i32 4, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <5 x i32> %arg1 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v5i32_fp4(<8 x i32> %arg0, <5 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v5i32(<8 x i32> %arg0, <5 x i32> %arg1, <4 x float> %arg2, + i32 4, ; cbsz + i32 4, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: operand 0 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v7i32.v8i32(<7 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 4, i32 4, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <7 x i32> %arg0 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v7i32_fp4__v8i32_fp4(<7 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v4i64.v8i32(<7 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 4, ; cbsz + i32 4, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: operand 1 must be 4, 6 or 8 element i32 vector +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v7i32(<8 x i32> %arg0, <7 x i32> %arg1, <4 x float> %arg2, i32 4, i32 4, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <7 x i32> %arg1 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp4__v7i32_fp4(<8 x i32> %arg0, <7 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v7i32(<8 x i32> %arg0, <7 x i32> %arg1, <4 x float> %arg2, + i32 4, ; cbsz + i32 4, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; -------------------------------------------------------------------- +; Out of bounds format +; -------------------------------------------------------------------- + +; CHECK: invalid value for cbsz format +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 9999, i32 2, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: i32 9999 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_invalid0__v8i32_fp6(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 9999, ; cbsz + i32 2, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: invalid value for blgp format +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 9999, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: i32 9999 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i32_invalid0(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 0, ; cbsz + i32 9999, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: invalid value for cbsz format +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 5, i32 2, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: i32 5 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_invalid1__v8i32_fp6(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 5, ; cbsz + i32 2, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: invalid value for blgp format +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 5, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: i32 5 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v8i32_fp8__v8i321_invalid(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, + i32 0, ; cbsz + i32 5, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: invalid value for cbsz format +; CHECK-NEXT: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 5, i32 5, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: i32 5 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v8i32_invalid__v8i32_invalid(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, + i32 5, ; cbsz + i32 5, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + +; -------------------------------------------------------------------- +; Incorrect signature for format cases (IR vector too small) +; -------------------------------------------------------------------- + +; CHECK: invalid vector type for format +; CHECK-NEXT: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v4i32.v8i32(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i32> %arg0 +; CHECK-NEXT: i32 0 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v4i32_fp8__v8i32_fp8(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v4i32.v8i32(<4 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + +; CHECK: invalid vector type for format +; CHECK-NEXT: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v4i32(<8 x i32> %arg0, <4 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i32> %arg1 +; CHECK-NEXT: i32 0 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4__v8i32_fp8___v4i32_fp8(<8 x i32> %arg0, <4 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v4i32(<8 x i32> %arg0, <4 x i32> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + +; CHECK: invalid vector type for format +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v4i32.v4i32(<4 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i32> %arg0 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v4i32_fp8__v4i32_fp8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v4i32.v4i32(<4 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: invalid vector type for format +; CHECK-NEXT: %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v6i32.v6i32(<6 x i32> %arg0, <6 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <6 x i32> %arg0 +define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4___v6i32_fp8__v6i32_fp8(<6 x i32> %arg0, <6 x i32> %arg1, <4 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v6i32.v6i32(<6 x i32> %arg0, <6 x i32> %arg1, <4 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <4 x float> %result +} + +; CHECK: invalid vector type for format +; CHECK-NEXT: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v4i32.v4i32(<4 x i32> %arg0, <4 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <4 x i32> %arg0 +; CHECK-NEXT: i32 0 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v4i32_fp8__v4i32_fp8(<4 x i32> %arg0, <4 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v4i32.v4i32(<4 x i32> %arg0, <4 x i32> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} + +; CHECK: invalid vector type for format +; CHECK-NEXT: %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v6i32.v6i32(<6 x i32> %arg0, <6 x i32> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0, i32 %scale0, i32 0, i32 %scale1) +; CHECK-NEXT: <6 x i32> %arg0 +; CHECK-NEXT: i32 0 +define <16 x float> @test_mfma_scale_f32_32x32x64_f8f6f4___v6i32_fp8__v6i32_fp8(<6 x i32> %arg0, <6 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) { + %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v6i32.v6i32(<6 x i32> %arg0, <6 x i32> %arg1, <16 x float> %arg2, + i32 0, ; cbsz + i32 0, ; blgp + i32 0, i32 %scale0, i32 0, i32 %scale1) + ret <16 x float> %result +} diff --git a/llvm/test/tools/llvm-dwarfdump/AArch64/dwarf-lang-metal.ll b/llvm/test/tools/llvm-dwarfdump/AArch64/dwarf-lang-metal.ll new file mode 100644 index 00000000000000..b5606ffc71dd37 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/AArch64/dwarf-lang-metal.ll @@ -0,0 +1,38 @@ +; RUN: llc -O0 %s -filetype=obj -o %t.o +; RUN: llvm-dwarfdump -arch arm64 %t.o | FileCheck %s +; AArch64 does not support Metal. However in the absence of a suitable target +; it can still be used to test that DW_LANG_Metal/DW_LNAME_Metal can be +; encoded/decoded correctly. +; CHECK: DW_AT_language (DW_LANG_Metal) +source_filename = "test.cpp" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx15.0.0" + +; Function Attrs: mustprogress noinline norecurse nounwind optnone ssp uwtable(sync) +define noundef i32 @main() #0 !dbg !11 { +entry: + ret i32 0, !dbg !14 +} + +attributes #0 = { mustprogress norecurse nounwind } + +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9} +!llvm.dbg.cu = !{!0} +!llvm.linker.options = !{} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_Metal, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "test.cpp", directory: "/tmp") +!2 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!3 = !{i32 2, !"SDK Version", [2 x i32] [i32 15, i32 0]} +!4 = !{i32 7, !"Dwarf Version", i32 5} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = !{i32 1, !"wchar_size", i32 4} +!7 = !{i32 8, !"PIC Level", i32 2} +!8 = !{i32 7, !"uwtable", i32 1} +!9 = !{i32 7, !"frame-pointer", i32 1} +!10 = !{!"clang"} +!11 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !12, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!12 = !DISubroutineType(types: !13) +!13 = !{!2} +!14 = !DILocation(line: 11, column: 1, scope: !11) diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s index cdb96dd717e94c..03b4041f77bcef 100644 --- a/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s @@ -1,13 +1,9 @@ # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx950 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s # CHECK: Iterations: 1 -# CHECK: Instructions: 15 -# CHECK: Total Cycles: 60 -# CHECK: Total uOps: 15 - -v_mfma_ld_scale_b32 v0, v0 -v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] -v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] +# CHECK: Instructions: 129 +# CHECK: Total Cycles: 1069 +# CHECK: Total uOps: 129 v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] @@ -15,7 +11,6 @@ v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15] v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 - v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] @@ -23,20 +18,312 @@ v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7] +v_mfma_ld_scale_b32 v0, v0 + +;; FIXME: should have different cycle count depending on whether either matrix is f8 +;; TODO: test vdc/adc +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:2 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:3 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:4 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:3 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:4 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 blgp:1 +v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 blgp:2 + +;; FIXME: should have different cycle count depending on whether either matrix is f8 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:2 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:3 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:4 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:3 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:4 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 + +;; FIXME: should have different cycle count depending on whether either matrix is f8 +v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 + +;; FIXME +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:1 +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:2 +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:3 +;; v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:4 + +v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 + +;; FIXME +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:1 +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:2 +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:3 +;; v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 blgp:4 + + +;; TODO: These results are wrong +v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 + +v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] +v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] + +v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] +v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] + +v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] +v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] + +v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] +v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] + +v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] +v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] + +v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] +v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] + +v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] +v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] + +v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] +v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] + +v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] +v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] + +v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] + +v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] +v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] + +v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] +v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] + +v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] +v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] + +v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] +v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] + +v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] +v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] + +v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] +v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] + +v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] +v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] + +v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] +v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] + +v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] +v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] + +v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 +v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 + +v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] +v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] + +v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] +v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] + +v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] +v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] + +v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] +v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] + +v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 +v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 + +v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 +v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 + +v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 +v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 + +v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 +v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 + +v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 +v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 + +v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] + +v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] +v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] + +v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 + +v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 # CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: - - - - 1.00 - - v_mfma_ld_scale_b32 v0, v0 -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15] # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7] +# CHECK-NEXT: - - - - 1.00 - - v_mfma_ld_scale_b32 v0, v0 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:2 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:3 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:4 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:3 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:4 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:2 blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 blgp:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:3 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:4 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:3 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:4 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:2 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0] +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] +# CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] +# CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] +# CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] +# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] +# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s index dd7ac2734318fd..f70ce42d115f20 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/independent-load-stores.s @@ -68,20 +68,20 @@ # ALL: Resource pressure per iteration: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 2.00 3.34 - +# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 3.34 2.00 - # ALL: Resource pressure by instruction: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 1.00 0.34 - addq $44, 64(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - - 0.33 - addq $44, 128(%r14) -# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 - 0.33 - addq $44, 192(%r14) -# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 256(%r14) -# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 - 0.33 - addq $44, 320(%r14) -# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 1.00 0.33 - addq $44, 384(%r14) -# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 - 0.34 - addq $44, 448(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - - 0.33 - addq $44, 512(%r14) -# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 - 0.33 - addq $44, 576(%r14) -# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 640(%r14) +# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 0.34 1.00 - addq $44, 64(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - 0.33 - - addq $44, 128(%r14) +# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 0.33 - - addq $44, 192(%r14) +# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 256(%r14) +# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 0.33 - - addq $44, 320(%r14) +# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 0.33 1.00 - addq $44, 384(%r14) +# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 0.34 - - addq $44, 448(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - 0.33 - - addq $44, 512(%r14) +# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 0.33 - - addq $44, 576(%r14) +# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 640(%r14) # ALL: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s index 5a7563d461cd94..cd46bd31d5fd43 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-adx.s @@ -46,15 +46,15 @@ adox (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - - 1.33 - +# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxq (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxq (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s index 9384488f06781c..330d2e0952e92d 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-aes.s @@ -58,19 +58,19 @@ aeskeygenassist $22, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - - 2.00 - +# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdec %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdec (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdec (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdeclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdeclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdeclast (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenc %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenc (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenc (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenclast (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - aesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - aesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - aesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - aeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - aeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - aeskeygenassist $22, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s index dc8ad8e46a7770..2c9a7c0aebb990 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx1.s @@ -1739,427 +1739,427 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 310.90 275.73 107.33 107.33 19.50 277.73 8.90 18.83 18.50 18.50 0.73 107.00 - +# CHECK-NEXT: 310.90 275.73 107.33 107.33 19.50 277.73 8.90 18.83 18.50 18.50 107.00 0.73 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vaesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vaesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vaesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - vaeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - vaeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - vaeskeygenassist $22, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %ymm3, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf128 (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomiss (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - vcvtsi2ss %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2ssl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - vdppd $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vextractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vextractps $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vinsertps $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %ymm2 -# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - vldmxcsr (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vinsertps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %ymm2 +# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - vldmxcsr (%rax) # CHECK-NEXT: - - 0.33 0.33 1.00 - - 0.33 - - - - - vmaskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovd %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovhlps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovlhps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %ymm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %ymm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %ymm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %ymm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovq %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - vpcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - vpcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - vpcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - vpcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - vpcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2f128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrd $1, %xmm0, %ecx @@ -2169,268 +2169,268 @@ vzeroupper # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrw $1, %xmm0, (%rax) # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrb $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrb $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrd $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrq $1, %rax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrq $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrw $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpslld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrad %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %ymm1 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundsd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - vstmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - 0.73 - - vzeroall +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - - 0.73 - vzeroall # CHECK-NEXT: - - - - - - - - - - - - - vzeroupper diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s index 96f66d1aad6c56..8c2e4911bf73d8 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avx2.s @@ -779,308 +779,308 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 - 65.33 - +# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 65.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti128 (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti128 (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti128 $1, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm0 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2i128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2i128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $1, (%rax), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %ymm0, %ecx # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s index 2f9fe5dd23a174..ae2185aae1a239 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxgfni.s @@ -58,19 +58,19 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - - 2.00 - +# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s index 5c8d5e74e7edab..8152d18f56c301 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s @@ -68,23 +68,23 @@ vpdpwssds (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - - 2.67 - +# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s index 4ed882a37a68e5..16c4fdf7e1b4cf 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi1.s @@ -93,33 +93,33 @@ tzcnt (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 5.33 4.33 - +# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 4.33 5.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnl %eax, %ebx, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnl %eax, %ebx, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnl (%rax), %ebx, %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnq %rax, %rbx, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnq %rax, %rbx, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrq %rax, (%rbx), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsil %eax, %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrq %rax, (%rbx), %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsil %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsil (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsiq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsiq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsiq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrq (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntw %ax, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s index 559ca83906cb74..a444369434dda7 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-bmi2.s @@ -108,39 +108,39 @@ shrx %rax, (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 0.40 5.33 - +# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 5.33 0.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhil %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhil %eax, (%rbx), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhil %eax, (%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhiq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhiq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mulxl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulxq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulxq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxl $1, %eax, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxl $1, (%rax), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxl $1, (%rax), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxq $1, %rax, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxq $1, (%rax), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxq $1, (%rax), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxq %rax, (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s index e61cc06951ae56..f3b64423348a70 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clflushopt.s @@ -31,8 +31,8 @@ clflushopt (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clflushopt (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clflushopt (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s index d35eadcc3f9d0c..627705311c1a78 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-clwb.s @@ -31,8 +31,8 @@ clwb (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clwb (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clwb (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s index 87a0e070096c25..5a3d8d0caf5edd 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmov.s @@ -226,7 +226,7 @@ cmovgq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - - 16.00 - +# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -246,22 +246,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgw %si, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbl %esi, %edi @@ -278,22 +278,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgl %esi, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovael (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoval (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovll (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovael (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoval (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovll (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbq %rsi, %rdi @@ -310,19 +310,19 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgeq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovleq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgq %rsi, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoveq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovneq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovleq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoveq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovneq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovleq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s index 93ef7797dfe363..1e5c8ba1afbb65 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-cmpxchg.s @@ -37,11 +37,11 @@ lock cmpxchg16b (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 4.40 1.33 - +# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 1.33 4.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - cmpxchg16b (%rax) -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - lock cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - lock cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - lock cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - lock cmpxchg16b (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s index 7e4eeebfdaca15..37cd0c25c7f4b8 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-f16c.s @@ -48,14 +48,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 - 0.67 - +# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - 0.50 - - 0.50 0.50 0.50 - - - vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s index 63fc8dbaa44b20..68430d33555586 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-fma.s @@ -508,199 +508,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - - 32.00 - +# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - 32.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s index 322fcb420196ef..45b52d8bbbfeed 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-gfni.s @@ -43,13 +43,13 @@ gf2p8mulb (%rax), %xmm1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineinvqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineinvqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineinvqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8mulb %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8mulb (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8mulb (%rax), %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s index 762b6d3caef2b9..9e5a084eeb9d31 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lea.s @@ -301,142 +301,142 @@ lea 1024(%rax, %rbx, 2), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - 9.00 - - +# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - - 9.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 0, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 0, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 0, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 0, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s index 68179d34d8a827..e4136b8f321dca 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-lzcnt.s @@ -43,13 +43,13 @@ lzcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s index c62ea2963323d5..27436b03bac79b 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-mmx.s @@ -287,112 +287,112 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 - 15.33 - +# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 15.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 5.50 - - - - 4.00 0.50 - - - - - - emms # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %mm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %mm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %mm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %mm0, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - packsswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packsswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packsswb (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packssdw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packssdw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packssdw (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packuswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packuswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packuswb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pand %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pand (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pand (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pandn %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pandn (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pandn (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddwd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmullw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - por %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - por (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - por (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhdq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhdq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhdq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhwd (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckldq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckldq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckldq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklwd (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pxor %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pxor (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pxor (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s index 3aa3122609563d..025ea85be72064 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-movbe.s @@ -43,13 +43,13 @@ movbe (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 0.20 1.00 - +# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 1.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbew %cx, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - movbew (%rax), %cx +# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - movbew (%rax), %cx # CHECK-NEXT: - 1.00 - - 0.50 - - 0.50 0.50 0.50 - - - movbel %ecx, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - movbel (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - movbel (%rax), %ecx # CHECK-NEXT: 0.50 1.00 - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbeq %rcx, (%rax) -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - movbeq (%rax), %rcx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - movbeq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s index 871035dbe34a6e..e9a65bbe028f60 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-pclmul.s @@ -33,9 +33,9 @@ pclmulqdq $11, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - pclmulqdq $11, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pclmulqdq $11, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pclmulqdq $11, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s index 9428d9015e2121..728fa7375e1d61 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-popcnt.s @@ -43,13 +43,13 @@ popcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s index ff872dacaf326d..257e9d54dabe4c 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-prefetchw.s @@ -33,9 +33,9 @@ prefetchw (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.67 0.67 - - - - - - - 0.67 - +# CHECK-NEXT: - - 0.67 0.67 - - - - - - 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetch (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchw (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetch (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchw (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s index b4a2252310804e..774bbc5e981da4 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdrand.s @@ -35,10 +35,10 @@ rdrand %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.80 1.00 - +# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.00 1.80 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandw %ax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandl %eax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandq %rax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandw %ax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandl %eax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandq %rax diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s index 8b8aff25b51639..c7ccd50384df1d 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-rdseed.s @@ -35,10 +35,10 @@ rdseed %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - - 1.00 - +# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedw %ax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedl %eax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedq %rax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedw %ax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedl %eax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedq %rax diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s index b292826913d914..f747b86634702c 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse1.s @@ -336,131 +336,131 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 29.50 1.00 5.00 5.00 5.00 - 19.33 - +# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 29.50 1.00 5.00 5.00 5.00 19.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comiss (%rax), %xmm1 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvtps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvtps2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - cvtsi2ss %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2ssl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %rcx # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvttps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvttps2pi (%rax), %mm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divss (%rax), %xmm2 -# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - - 0.33 - ldmxcsr (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divss (%rax), %xmm2 +# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - 0.33 - - ldmxcsr (%rax) # CHECK-NEXT: 2.00 - - - 0.50 - - 0.50 0.50 0.50 - - - maskmovq %mm0, %mm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movaps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movhlps %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movlhps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhps (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskps %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntq %mm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movss %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movups (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pextrw $1, %mm0, %ecx # CHECK-NEXT: - - - - - 2.00 - - - - - - - pinsrw $1, %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %mm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhuw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht0 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht1 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht2 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchnta (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht0 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht1 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht2 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchnta (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pshufw $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - sfence # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufps $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufps $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtss (%rax), %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - stmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s index 964caa1d7f73c0..ff7d39ba77b3a9 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse2.s @@ -692,96 +692,96 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 107.70 99.20 39.33 39.33 8.50 71.70 1.20 7.83 7.50 7.50 0.20 39.00 - +# CHECK-NEXT: 107.70 99.20 39.33 39.33 8.50 71.70 1.20 7.83 7.50 7.50 39.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andpd (%rax), %xmm2 -# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 0.20 - - clflush (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andpd (%rax), %xmm2 +# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 - 0.20 - clflush (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comisd (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtss2sd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divsd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lfence # CHECK-NEXT: - - 0.33 0.33 1.00 - - 0.33 - - - - - maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxsd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - mfence # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movapd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - movdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqu (%rax), %xmm2 # CHECK-NEXT: 0.83 0.33 - - - 0.83 - - - - - - - movdq2q %xmm0, %mm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskpd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntil %eax, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntiq %rax, (%rax) @@ -789,177 +789,177 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntpd %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %xmm0, (%rax) # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - movq2dq %mm0, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movupd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packssdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packssdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packssdw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packsswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packsswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packsswb (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packuswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packuswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packuswb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pand %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pand (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pand (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pandn %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pandn (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pandn (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrw $1, %eax, %xmm0 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrw $1, (%rax), %xmm0 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrw $1, (%rax), %xmm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %xmm0, %ecx # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmullw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmuludq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuludq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - por %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - por (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - por (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pslld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - pslld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pslldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrad $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrad %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psraw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psraw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - psrldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckldq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckldq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckldq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklwd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pxor %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pxor (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pxor (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorpd (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s index 15baea9604c747..972741435935bf 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse3.s @@ -82,28 +82,28 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - - 3.33 - +# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - lddqu (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - lddqu (%rax), %xmm2 # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - monitor # CHECK-NEXT: - - - - - 1.00 - - - - - - - movddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movddup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsldup (%rax), %xmm2 # CHECK-NEXT: 1.75 1.75 - - - 2.75 3.75 - - - - - - mwait diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s index ffe9150cc59167..c715cbaa559623 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse41.s @@ -269,37 +269,37 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 - 14.67 - +# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 14.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendpd $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendps $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendps $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendps $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvpd %xmm0, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - dppd $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - dpps $22, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - insertps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - insertps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - insertps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movntdqa (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - mpsadbw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packusdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packusdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packusdw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pblendvb %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pblendvb %xmm0, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pblendw $11, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pblendw $11, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqq (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrd $1, %xmm0, %ecx @@ -308,64 +308,64 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrq $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - phminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - phminposuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrb $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrd $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrd $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrq $1, %rax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrq $1, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuldq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuldq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuldq (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - pmulld %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - pmulld (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - pmulld (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - ptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - ptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - ptest (%rax), %xmm1 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundsd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s index cb5b34e9b6468d..ad80bad814640f 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-sse42.s @@ -78,27 +78,27 @@ pcmpgtq (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - - 3.33 - +# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32l %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32l (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32l (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32w %ax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32w (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32w (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32q %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32q (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32q (%rax), %rcx # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - pcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - pcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - pcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - pcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pcmpgtq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pcmpgtq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pcmpgtq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s index 33ec9b0fa64d21..31e61eb6374760 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-ssse3.s @@ -188,71 +188,71 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - - 10.67 - +# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phaddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phaddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phaddsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phaddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phaddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phsubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phsubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phsubsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phsubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phsubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhrsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pshufb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufb (%rax), %mm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufb %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufb (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s index 74b19b968bec57..e6722b884eb95a 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vaes.s @@ -48,15 +48,15 @@ vaesenclast (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - - 1.33 - +# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s index cd834d35c43d53..f474a07b7be49c 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-vpclmulqdq.s @@ -33,9 +33,9 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %ymm1, %ymm3 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s index 559ae7e957a29e..fd5968e319bceb 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_32.s @@ -64,7 +64,7 @@ salc # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.60 0.33 - +# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.33 0.60 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -79,5 +79,5 @@ salc # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - daa # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - das # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - into -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - leave # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - salc diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s index ec303b4d9f2df3..5cad9072f37c60 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x86_64.s @@ -1965,419 +1965,419 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 191.33 213.00 - +# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 213.00 191.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $665536, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - addq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfw (%rax), %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrw (%rax), %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrl %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfl (%rax), %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrl (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfq (%rax), %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bswapl %eax # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bswapq %rax # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw %si, %di -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsw %si, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsw %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw $7, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsw $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl %esi, %edi -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsl %esi, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsl %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl $7, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsl $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq %rsi, %rdi -# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 1.00 0.33 - btq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btsq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btsq %rsi, (%rax) +# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 0.33 1.00 - btq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btsq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btsq %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq $7, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsq $7, (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cbtw # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cwtl # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cltq -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cwtd +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cwtd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cltd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cqto # CHECK-NEXT: - - - - - - - - - - - - - clc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cld -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmc -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq (%rax), %rdi -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsb %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsw %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsl %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgb %cl, %bl -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgw %cx, %bx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgq %rcx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cld +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmc +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsb %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsw %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsl %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsq %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgb %cl, %bl +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgw %cx, %bx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgl %ecx, %ebx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgq %rcx, %rbx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 7.50 6.50 - - 0.50 5.00 5.00 0.50 0.50 0.50 - - - cpuid -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - decq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decq (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - divq (%rax) -# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 - 4.67 - enter $7, $4095 +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - divq (%rax) +# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 4.67 - - enter $7, $4095 # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - idivq (%rax) +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - idivq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - imulw %di -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - imulw (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - imulw %di +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - imulw (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulw (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $511, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $511, (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $7, (%rax), %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - imull %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - imull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulw (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $511, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $511, (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $7, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $7, (%rax), %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - imull %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - imull (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $665536, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $665536, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $665536, (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $7, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $7, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $7, (%rax), %edi # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - imulq %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - imulq (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - imulq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $665536, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $665536, (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $665536, (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $7, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $7, (%rax), %rdi -# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 1.87 2.67 - inb $7, %al -# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 1.87 2.33 - inb %dx, %al -# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 1.67 2.33 - inw $7, %ax -# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 1.80 2.33 - inw %dx, %ax -# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 1.87 3.33 - inl $7, %eax -# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 2.47 3.67 - inl %dx, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incl (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $7, (%rax), %rdi +# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 2.67 1.87 - inb $7, %al +# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 2.33 1.87 - inb %dx, %al +# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 2.33 1.67 - inw $7, %ax +# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 2.33 1.80 - inw %dx, %ax +# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 3.33 1.87 - inl $7, %eax +# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 3.67 2.47 - inl %dx, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - incq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incq (%rax) -# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 1.20 2.67 - insb %dx, %es:(%rdi) -# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 1.13 3.00 - insw %dx, %es:(%rdi) -# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 1.00 3.67 - insl %dx, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incq (%rax) +# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 2.67 1.20 - insb %dx, %es:(%rdi) +# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 3.00 1.13 - insw %dx, %es:(%rdi) +# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 3.67 1.00 - insl %dx, %es:(%rdi) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - int $7 -# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 1.47 - - invlpg (%rax) +# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 - 1.47 - invlpg (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - invlpga # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lahf -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - leave -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsb (%rsi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsw (%rsi), %ax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsl (%rsi), %eax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsq (%rsi), %rax -# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - 0.40 - - loop 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loope 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loopne 0 -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsb (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsw (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsl (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsq (%rsi), %es:(%rdi) -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbw %al, %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbw %al, %di +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsb (%rsi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsw (%rsi), %ax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsl (%rsi), %eax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsq (%rsi), %rax +# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - - 0.40 - loop 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loope 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loopne 0 +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsb (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsw (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsl (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsq (%rsi), %es:(%rdi) +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbw %al, %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbw %al, %di # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - movsbw (%rax), %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - movzbw (%rax), %di -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbl %al, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbl %al, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbq %al, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbq %al, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswl %ax, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwl %ax, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswq %ax, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwq %ax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movslq %eax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movslq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - movzbw (%rax), %di +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbl %al, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbl %al, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbq %al, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbq %al, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswl %ax, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwl %ax, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswq %ax, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwq %ax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movslq %eax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - mulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - mulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - mulw %si -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - mulw (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mull %edx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - mulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - mulw %si +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - mulw (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mull %edx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mull (%rax) # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulq %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulq (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negq (%r10) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulq (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negq (%r10) # CHECK-NEXT: - - - - - - - - - - - - - nop # CHECK-NEXT: - - - - - - - - - - - - - nopw %di # CHECK-NEXT: - - - - - - - - - - - - - nopw (%rcx) @@ -2385,303 +2385,303 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - - nopl (%r8) # CHECK-NEXT: - - - - - - - - - - - - - nopq %rdx # CHECK-NEXT: - - - - - - - - - - - - - nopq (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notq (%r10) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orq (%rax), %rdi -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.00 1.67 - outb %al, $7 -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.00 1.67 - outb %al, %dx -# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 0.80 2.33 - outw %ax, $7 -# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 1.20 2.33 - outw %ax, %dx -# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 0.80 3.00 - outl %eax, $7 -# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 1.20 3.00 - outl %eax, %dx -# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 1.20 2.33 - outsb (%rsi), %dx -# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 1.00 2.67 - outsw (%rsi), %dx -# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 1.20 3.33 - outsl (%rsi), %dx +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notq (%r10) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orq (%rax), %rdi +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.67 1.00 - outb %al, $7 +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.67 1.00 - outb %al, %dx +# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 2.33 0.80 - outw %ax, $7 +# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 2.33 1.20 - outw %ax, %dx +# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 3.00 0.80 - outl %eax, $7 +# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 3.00 1.20 - outl %eax, %dx +# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 2.33 1.20 - outsb (%rsi), %dx +# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 2.67 1.00 - outsw (%rsi), %dx +# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 3.33 1.20 - outsl (%rsi), %dx # CHECK-NEXT: 0.50 - - - - 1.00 0.50 - - - - - - pause -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb $7, %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb $7, %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb $7, (%rax) -# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - 0.40 - - rclb %cl, %dil -# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - 0.60 - - rcrb %cl, %dil -# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.20 0.33 - rclb %cl, (%rax) -# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.40 0.33 - rcrb %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw $7, %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw $7, %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclw %cl, %di -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrw %cl, %di -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclw %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrw %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll $7, %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl $7, %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcll %cl, %edi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrl %cl, %edi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcll %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrl %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq $7, %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq $7, %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclq %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrq %cl, (%rax) -# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - 0.33 - - rdmsr -# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - 1.80 - - rdpmc -# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - 1.00 - - rdtsc +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb $7, %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb $7, %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb $7, (%rax) +# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - - 0.40 - rclb %cl, %dil +# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - - 0.60 - rcrb %cl, %dil +# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.33 0.20 - rclb %cl, (%rax) +# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.33 0.40 - rcrb %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw $7, %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw $7, %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclw %cl, %di +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrw %cl, %di +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclw %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrw %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll $7, %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl $7, %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcll %cl, %edi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrl %cl, %edi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcll %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrl %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq $7, %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq $7, %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclq %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrq %cl, (%rax) +# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - - 0.33 - rdmsr +# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - - 1.80 - rdpmc +# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - - 1.00 - rdtsc # CHECK-NEXT: 7.50 5.33 - - - 4.00 4.17 - - - - - - rdtscp # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb $7, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb $7, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw $7, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw $7, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll $7, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl $7, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq $7, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq $7, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sahf # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb $7, %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrb %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw $7, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrw %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl $7, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarl %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarl %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrl %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq $7, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrq %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbq (%rax), %rdi -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasb %es:(%rdi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasw %es:(%rdi), %ax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasl %es:(%rdi), %eax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasq %es:(%rdi), %rax +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasb %es:(%rdi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasw %es:(%rdi), %ax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasl %es:(%rdi), %eax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasq %es:(%rdi), %rax # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - seto %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - seto (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setno %al @@ -2714,171 +2714,171 @@ xorq (%rax), %rdi # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setg (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setle %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setle (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldw %cl, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdw %cl, %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldw $7, %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldw $7, %si, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdw $7, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldl %cl, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdl %cl, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldw $7, %si, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdw $7, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldl %cl, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdl %cl, %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldl $7, %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdl $7, %esi, %edi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldl $7, %esi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdl $7, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldq %cl, %rsi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldl $7, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdl $7, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdq %cl, %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldq $7, %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdq $7, %rsi, %rdi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - stc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - std -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosb %al, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosw %ax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosl %eax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosq %rax, %es:(%rdi) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $665536, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - stc +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - std +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosb %al, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosw %ax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosl %eax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosq %rax, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - subq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq %rsi, (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - ud2 # CHECK-NEXT: 52.00 31.50 - - 0.50 27.00 31.50 0.50 0.50 0.50 - - - wrmsr -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddb %bl, %cl -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddb %bl, (%rcx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddb %bl, (%rcx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddw %bx, %cx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddw %ax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddl %ebx, %ecx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddl %eax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddq %rbx, %rcx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddq %rax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgb %bl, %cl -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgb %bl, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgb %bl, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %ax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %cx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgw %ax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %eax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %ecx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgl %eax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rcx -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - xchgq %rax, (%rbx) -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - lock xchgq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - xlatb -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddb %bl, %cl +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddb %bl, (%rcx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddb %bl, (%rcx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddw %bx, %cx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddw %ax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddl %ebx, %ecx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddl %eax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddq %rbx, %rcx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddq %rax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgb %bl, %cl +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgb %bl, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgb %bl, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %ax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %cx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgw %ax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %eax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %ecx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgl %eax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rcx +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - xchgq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - lock xchgq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - xlatb +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s index 5947c582df4b37..e50ec3c04ae327 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-x87.s @@ -372,7 +372,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 131.25 62.75 49.67 49.67 46.00 159.25 74.75 28.00 19.00 19.00 1.00 0.67 7.00 +# CHECK-NEXT: 131.25 62.75 49.67 49.67 46.00 159.25 74.75 28.00 19.00 19.00 0.67 1.00 7.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -520,7 +520,7 @@ fyl2xp1 # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(1) # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(3) # CHECK-NEXT: 17.25 12.25 16.50 16.50 - 12.75 14.75 - - - - - - fxrstor (%eax) -# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 1.00 0.67 - fxsave (%eax) +# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 0.67 1.00 - fxsave (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fxtract # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2x # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2xp1 diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s index 1d1104d487a2de..0b3fd683d357af 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-xsave.s @@ -43,12 +43,12 @@ xsetbv # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 72.00 71.67 2.33 2.33 0.50 53.83 67.17 0.50 0.50 0.50 6.33 1.33 - +# CHECK-NEXT: 72.00 71.67 2.33 2.33 0.50 53.83 67.17 0.50 0.50 0.50 1.33 6.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - 1.40 - - xgetbv +# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - - 1.40 - xgetbv # CHECK-NEXT: 5.25 6.25 0.50 0.50 - 5.25 13.25 - - - - - - xrstor (%rax) # CHECK-NEXT: 5.25 6.25 0.50 0.50 - 5.25 13.25 - - - - - - xrstors (%rax) -# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 - 1.33 - xsave (%rax) -# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - 4.93 - - xsetbv +# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 1.33 - - xsave (%rax) +# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - - 4.93 - xsetbv diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s index 90fea632be66b5..66f647b1978f02 100644 --- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/zero-idioms.s @@ -227,14 +227,14 @@ vpxor %ymm3, %ymm3, %ymm5 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 27.00 26.00 - - - 27.00 1.00 - - - 2.00 - - +# CHECK-NEXT: 27.00 26.00 - - - 27.00 1.00 - - - - 2.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - - - - - - - - - 1.00 - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - subl %eax, %eax # CHECK-NEXT: - - - - - - 1.00 - - - - - - subq %rax, %rax # CHECK-NEXT: - - - - - 1.00 - - - - - - - xorl %eax, %eax -# CHECK-NEXT: - - - - - - - - - - 1.00 - - xorq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - xorq %rax, %rax # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm2, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s index df0053a1dcb9b5..25f79397fa071d 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index 1b196b4355a6d4..028625013a85cc 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1115,9 +1115,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 2 11 1.00 * vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 9 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 9 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 424.25 3.25 12.67 +# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 421.25 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1825,9 +1825,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 @@ -1847,7 +1847,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s index 9fcd03bfb2fd45..07870d92dac555 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-f16c.s @@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 4.00 - 0.67 +# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 4.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 - - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s index e76d90521afa9c..8851be4679a1e9 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s @@ -423,7 +423,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2 @@ -433,7 +433,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 9 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67 +# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -709,7 +709,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 @@ -719,7 +719,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s index 49db25cb0bdfb1..7f07fd56fe60dc 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx @@ -1152,7 +1152,7 @@ vzeroupper # CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx @@ -1734,7 +1734,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50 +# CHECK-NEXT: - 572.00 248.50 319.00 39.00 371.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1845,7 +1845,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx @@ -1860,7 +1860,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index 8736c1c6234af7..7cd1d3fc35ee67 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -1263,14 +1263,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 2 5 1.00 vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2usi %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2usi (%rax), %ecx @@ -2053,7 +2053,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 523.00 304.50 304.50 +# CHECK-NEXT: - 1506.00 198.00 335.00 25.00 529.00 304.50 304.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2230,14 +2230,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2usi %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2usi (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 8bf3c21891f7f8..9587c40ede68be 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -1970,14 +1970,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 # CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 # CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 @@ -3269,7 +3269,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 738.50 495.50 495.50 +# CHECK-NEXT: - 1935.00 278.00 579.50 48.00 744.50 495.50 495.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -3511,14 +3511,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s index 7dea75f8f8fec0..4abcd6fc516b79 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-f16c.s @@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00 +# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s index df0053a1dcb9b5..25f79397fa071d 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 05c476079c0f9d..179393abb08d47 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1115,9 +1115,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 12 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 2 12 1.00 * vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 3 10 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 10 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 427.58 3.25 12.67 +# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 424.58 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1825,9 +1825,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 @@ -1847,7 +1847,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s index 538ecf99074eda..d1fb824fee23db 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-f16c.s @@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67 +# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 6.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s index 907db6f44a9e29..37a28a66fd3506 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s @@ -209,7 +209,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comiss %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comiss (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s index 3b4aeb37968fd5..c9c3e20eeadedd 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s @@ -423,7 +423,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2 @@ -433,7 +433,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67 +# CHECK-NEXT: - 112.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -709,7 +709,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2 @@ -719,7 +719,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s index d6d157827b3141..781676d70763c0 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s @@ -1137,7 +1137,7 @@ vzeroupper # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx # CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx @@ -1152,7 +1152,7 @@ vzeroupper # CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx @@ -1734,7 +1734,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 248.50 319.00 39.00 369.50 179.50 179.50 +# CHECK-NEXT: - 572.00 248.50 319.00 39.00 371.50 179.50 179.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1845,7 +1845,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 vcvtsd2si (%rax), %ecx @@ -1860,7 +1860,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - 2.00 - - vcvtsi2ss %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssl (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtsi2ssq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: - - 1.00 1.00 - - - - vcvtss2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s index a2ec86e8724faa..9284810b9e73be 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-f16c.s @@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00 +# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s index e2cfd02bc76c84..ff0f22bec1402c 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s @@ -448,7 +448,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2 @@ -687,7 +687,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00 +# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -732,7 +732,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s index 678619ff4f5f6e..08a706d4ab97ac 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/independent-load-stores.s @@ -68,20 +68,20 @@ # ALL: Resource pressure per iteration: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 2.00 3.34 - +# ALL-NEXT: 2.00 2.00 3.33 3.33 5.00 2.00 2.00 5.00 5.00 5.00 3.34 2.00 - # ALL: Resource pressure by instruction: # ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 1.00 0.34 - addq $44, 64(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - - 0.33 - addq $44, 128(%r14) -# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 - 0.33 - addq $44, 192(%r14) -# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 256(%r14) -# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 - 0.33 - addq $44, 320(%r14) -# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 1.00 0.33 - addq $44, 384(%r14) -# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 - 0.34 - addq $44, 448(%r14) -# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - - 0.33 - addq $44, 512(%r14) -# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 - 0.33 - addq $44, 576(%r14) -# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - - 0.34 - addq $44, 640(%r14) +# ALL-NEXT: - - 0.33 0.33 - - - - 1.00 1.00 0.34 1.00 - addq $44, 64(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 - 1.00 1.00 - - 0.33 - - addq $44, 128(%r14) +# ALL-NEXT: - - 0.34 0.33 - 1.00 - - 1.00 1.00 0.33 - - addq $44, 192(%r14) +# ALL-NEXT: - 1.00 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 256(%r14) +# ALL-NEXT: 1.00 - 0.33 0.34 - - - - 1.00 1.00 0.33 - - addq $44, 320(%r14) +# ALL-NEXT: - - 0.34 0.33 1.00 - - 1.00 - - 0.33 1.00 - addq $44, 384(%r14) +# ALL-NEXT: - - 0.33 0.33 - - 1.00 - 1.00 1.00 0.34 - - addq $44, 448(%r14) +# ALL-NEXT: - - 0.33 0.34 1.00 1.00 - 1.00 - - 0.33 - - addq $44, 512(%r14) +# ALL-NEXT: - 1.00 0.34 0.33 - - - - 1.00 1.00 0.33 - - addq $44, 576(%r14) +# ALL-NEXT: 1.00 - 0.33 0.33 1.00 - - 1.00 - - 0.34 - - addq $44, 640(%r14) # ALL: Timeline view: diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s index b1345cd11bb9c5..3fab0dfba8b97b 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-adx.s @@ -46,15 +46,15 @@ adox (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - - 1.33 - +# CHECK-NEXT: 4.00 - 1.33 1.33 - - 4.00 - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcxq (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxl %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxl (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxl (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adoxq %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adoxq (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adoxq (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s index b43f35e8347f7d..8d2e4f6c78ce14 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-aes.s @@ -58,19 +58,19 @@ aeskeygenassist $22, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - - 2.00 - +# CHECK-NEXT: 17.33 10.33 2.00 2.00 - 9.33 2.00 - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdec %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdec (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdec (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesdeclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesdeclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesdeclast (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenc %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenc (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenc (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - aesenclast %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - aesenclast (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - aesenclast (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - aesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - aesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - aesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - aeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - aeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - aeskeygenassist $22, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s index 3eb2864c5376d3..5d2e480a97a8e3 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx1.s @@ -1739,427 +1739,427 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 310.90 275.73 107.00 107.00 20.00 277.73 8.90 18.50 18.50 19.00 0.73 107.00 - +# CHECK-NEXT: 310.90 275.73 107.00 107.00 20.00 277.73 8.90 18.50 18.50 19.00 107.00 0.73 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vaesimc %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vaesimc (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vaesimc (%rax), %xmm2 # CHECK-NEXT: 5.83 2.33 - - - 4.83 1.00 - - - - - - vaeskeygenassist $22, %xmm0, %xmm2 -# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - - 0.33 - vaeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 5.50 2.00 0.33 0.33 - 4.50 1.00 - - - 0.33 - - vaeskeygenassist $22, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendpd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendpd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vblendps $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vblendps $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vblendvps %ymm3, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf128 (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcmpeqss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomiss (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtsi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - vcvtsi2ss %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsi2ssl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtsi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2si (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - vdppd $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vextractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - vextractps $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vinsertps $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vlddqu (%rax), %ymm2 -# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - vldmxcsr (%rax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vinsertps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vlddqu (%rax), %ymm2 +# CHECK-NEXT: 1.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - vldmxcsr (%rax) # CHECK-NEXT: - - - - 1.50 - - - - 0.50 - - - vmaskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovpd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovpd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovd %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovhlps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovlhps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vmovhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vmovhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vmovlps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vmovlps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskpd %ymm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %xmm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovmskps %ymm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntdq %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntpd %ymm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovntps %ymm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmovq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovq %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vmovups %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - vpcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - vpcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - vpcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - vpcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - vpcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2f128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrd $1, %xmm0, %ecx @@ -2169,268 +2169,268 @@ vzeroupper # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrw $1, %xmm0, (%rax) # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrb $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrb $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrd $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrq $1, %rax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrq $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrw $1, %eax, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpslld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrad %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %xmm0, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vptest %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vptest (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vptest (%rax), %ymm1 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrcpss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrcpss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundpd $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundps $1, (%rax), %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundsd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufpd $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vshufps $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - vstmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestpd %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestpd (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestpd (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vtestps %ymm0, %ymm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vtestps (%rax), %ymm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vtestps (%rax), %ymm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomisd (%rax), %xmm1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - 0.73 - - vzeroall +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2.23 4.07 - - - 1.07 1.90 - - - - 0.73 - vzeroall # CHECK-NEXT: - - - - - - - - - - - - - vzeroupper diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s index a058279f0644df..7ee90759a60762 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx2.s @@ -779,308 +779,308 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 - 65.33 - +# CHECK-NEXT: 111.00 130.00 65.33 65.33 2.50 135.00 - 2.50 2.50 2.50 65.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti128 (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti128 (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti128 $1, %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti128 $1, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %ymm0 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - vmpsadbw $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - vmpsadbw $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsd %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpand %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpand (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpand (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpandn %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpandn (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpandn (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendd $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendd $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - 1.00 - - - - - - - vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpblendw $11, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendw $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendw $11, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm0, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpeqw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpcmpgtw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vperm2i128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vperm2i128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $1, (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - - 2.67 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - - 0.67 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - - 1.33 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $1, (%rax), %ymm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 2.67 2.67 - 1.33 - - - - 2.67 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1.33 0.83 0.67 0.67 - 0.83 - - - - 0.67 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1.33 1.33 1.33 1.33 - 1.33 - - - - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - vphsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - vphsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovd %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpmaskmovq (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpmaskmovq (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %xmm0, %xmm1, (%rax) # CHECK-NEXT: 1.00 - - - 0.50 - - 0.50 0.50 0.50 - - - vpmaskmovq %ymm0, %ymm1, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminud %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminud (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovmskb %ymm0, %ecx # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $1, (%rax), %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $1, %ymm0, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $1, (%rax), %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $1, (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsignw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsignw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsignw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpslld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpslld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrad $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrad (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrld $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrld (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlq $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $1, %ymm0, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklqdq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklqdq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpxor %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpxor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpxor (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s index 5ad7397a8ddc30..d5f8448abe1bac 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s @@ -2058,7 +2058,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 508.60 13.60 218.67 218.67 48.50 578.60 1.60 48.50 48.50 48.50 1.60 218.67 - +# CHECK-NEXT: 508.60 13.60 218.67 218.67 48.50 578.60 1.60 48.50 48.50 48.50 218.67 1.60 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -2072,943 +2072,943 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - kshiftrw $2, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - kunpckbw %k0, %k1, %k2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vaddps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x4 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf64x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x4 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x4 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti64x4 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x4 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf64x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x4 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti64x4 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x4 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x4 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastsd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastsd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastsd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastsd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastss (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastss (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastss (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastss (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqsd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqsd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqsd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqsd (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqss %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqss %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqss (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqss (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcomiss %xmm16, %xmm17 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcomiss (%rax), %xmm17 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcomiss (%rax), %xmm17 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtdq2pd %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtdq2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtdq2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtdq2ps %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2dq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2udq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2dq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2udq %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2ps %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2dq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2dq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2dq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2dq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2dq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2pd %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtsd2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtsd2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtsd2usi (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvtss2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvtss2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtss2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtss2usi (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2udq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2udq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2udq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2udq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttsd2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttsd2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttsd2usi (%rax), %rcx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - vcvttss2usi %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - vcvttss2usi %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2usi (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttss2usi (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2usi (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttss2usi (%rax), %rcx # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtudq2pd %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtudq2ps %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtusi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtusi2sd %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtusi2sdl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtusi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtusi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtusi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtusi2ss %ecx, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - vcvtusi2ss %rcx, %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtusi2ssl (%rax), %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtusi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtusi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtusi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - {evex} vextractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - {evex} vextractps $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231pd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd132ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd213ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vfmadd231ps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - - 5.33 - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - 5.33 - - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmaxps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vminps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovapd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovapd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovapd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovapd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovapd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovapd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovaps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovaps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovaps (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovaps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovaps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovaps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovddup (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovddup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovddup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovddup (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa32 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa32 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa32 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa32 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa32 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa64 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa64 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa64 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqa64 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqa64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqa64 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu32 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu32 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu32 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu32 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu32 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu32 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu32 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu32 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu64 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu64 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu64 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu64 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu64 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu64 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu64 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovntdqa (%rax), %zmm0 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu64 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovntdqa (%rax), %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovshdup %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovshdup %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovshdup (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovshdup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovshdup %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovshdup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovshdup (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovsldup %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovsldup %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovsldup (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovsldup %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovsldup (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovsldup (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovupd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovupd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovupd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovupd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovupd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovupd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovups %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovups %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovups (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovups (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovups %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovups (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovups (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vmulps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsd (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsd (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpbroadcastq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpbroadcastq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - - 5.33 - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} -# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - - 2.67 - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 5.33 5.33 - 2.00 - - - - 5.33 - - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1.00 - 2.67 2.67 - 2.00 - - - - 2.67 - - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2.00 - - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 2.00 - - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 2.00 - - - - - - - - - - - - vpmulld %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to8}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to16}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to8}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to8}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 0.20 - - vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 - 0.20 - vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufd $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhdq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhdq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhdq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhdq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhqdq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhqdq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhqdq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckldq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckldq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckldq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckldq (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklqdq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 0.20 - - vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} -# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklqdq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.20 0.20 - - 8.00 0.20 0.20 8.00 8.00 8.00 - 0.20 - vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} +# CHECK-NEXT: 2.20 0.20 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtpd %zmm16, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax), %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax), %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax){1to8}, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtpd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtpd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtpd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtpd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtps %zmm16, %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax), %zmm19 -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax), %zmm19 +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax){1to16}, %zmm19 # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 2.50 - - - - 0.50 - - - - - - - vsqrtps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsqrtps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsqrtps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtsd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtsd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtss (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to16}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to8}, %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vsubps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vucomiss %xmm16, %xmm17 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vucomiss (%rax), %xmm17 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vucomiss (%rax), %xmm17 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s index d4ae44a3ca80e1..6fe1e5bb144cea 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalg.s @@ -63,23 +63,23 @@ vpshufbitqmb (%rdi), %zmm17, %k2 {%k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 - 2.67 2.67 - 16.00 - - - - - 2.67 - +# CHECK-NEXT: 4.00 - 2.67 2.67 - 16.00 - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %zmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %zmm0 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %zmm16, %zmm17, %k2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %zmm17, %k2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %zmm17, %k2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %zmm16, %zmm17, %k2 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %zmm17, %k2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %zmm17, %k2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s index 7c2d8fa99835d3..e0fa580d091878 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bitalgvl.s @@ -98,39 +98,39 @@ vpshufbitqmb (%rdi), %ymm17, %k2 {%k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 - 5.33 5.33 - 32.00 - - - - - 5.33 - +# CHECK-NEXT: 8.00 - 5.33 5.33 - 32.00 - - - - 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntb %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntb (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntb (%rdi), %ymm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntw %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntw (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntw (%rdi), %ymm0 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %xmm16, %xmm17, %k2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %xmm17, %k2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %xmm17, %k2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %xmm16, %xmm17, %k2 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %xmm17, %k2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %xmm17, %k2 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %ymm16, %ymm17, %k2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %ymm17, %k2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %ymm17, %k2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpshufbitqmb %ymm16, %ymm17, %k2 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufbitqmb (%rdi), %ymm17, %k2 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufbitqmb (%rdi), %ymm17, %k2 {%k1} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s index 989d72185f8fa0..54b9e367c12709 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bw.s @@ -1129,7 +1129,7 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 269.20 4.20 73.33 73.33 7.50 272.20 0.20 7.50 7.50 7.50 0.20 73.33 - +# CHECK-NEXT: 269.20 4.20 73.33 73.33 7.50 272.20 0.20 7.50 7.50 7.50 73.33 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -1140,12 +1140,12 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandnd %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandnq %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovd %k0, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - kmovd (%rax), %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - kmovd (%rax), %k2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - kmovd %k0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - kmovd %eax, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovd %k0, %eax # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovq %k0, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - kmovq (%rax), %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - kmovq (%rax), %k2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - kmovq %k0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - kmovq %rax, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovq %k0, %rax @@ -1168,275 +1168,275 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxord %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxorq %k0, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu8 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu8 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu8 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu8 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu8 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.20 0.20 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vmovdqu8 %zmm16, (%rax) {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.20 0.20 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vmovdqu8 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu8 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu8 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu16 %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu16 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu16 (%rax), %zmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %zmm16, (%rax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu16 %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu16 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %zmm16, (%rax) {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vmovdqu16 %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vmovdqu16 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vmovdqu16 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsb %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsb %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsb %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsw %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsw %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpabsw %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpaddusw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpaddw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpavgw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpblendmw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpblendmw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpblendmw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrb $0, %xmm16, %eax # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrb $0, %xmm16, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrw $0, %xmm16, %eax # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrw $0, %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrb $0, %eax, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrb $0, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrb $0, (%rax), %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrw $0, %eax, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrw $0, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrw $0, (%rax), %xmm16, %xmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpermw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpermw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpermw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2w %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2w (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2w (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2w %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2w (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2w (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2w %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2w (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2w (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2w %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2w (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2w (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2w %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2w (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2w (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2w %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2w (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2w (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddubsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddubsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddubsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddwd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddwd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaddwd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxub %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxub %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxub %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxuw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxuw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmaxuw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminub %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminub %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminub %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminuw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminuw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpminuw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovb2m %zmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovw2m %zmm0, %k0 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2b %k0, %zmm0 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2w %k0, %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovswb %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovswb %zmm16, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovswb %zmm16, %ymm19 {%k1} @@ -1450,186 +1450,186 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpmovwb %zmm16, %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 2.00 - 0.50 0.50 0.50 - - - vpmovwb %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %ymm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhrsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhuw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhuw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhuw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmulhw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmullw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmullw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmullw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufhw $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufhw $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufhw $0, (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufhw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufhw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufhw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshufhw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshufhw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshufhw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshuflw $0, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshuflw $0, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshuflw $0, (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshuflw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshuflw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshuflw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpshuflw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpshuflw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpshuflw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpslldq $1, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpslldq $1, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpslldq $1, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllw $0, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsllw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsllw %xmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsllw %xmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsllw %xmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsravw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsravw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsravw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsraw $0, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsraw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsraw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsraw %xmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsraw %xmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsraw %xmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsrldq $1, %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsrldq $1, (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsrldq $1, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlw $0, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlw $0, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsrlw $0, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsrlw %xmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsrlw %xmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpsrlw %xmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubsw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpsubusw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpsubw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %zmm0, %zmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %zmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %zmm0, %zmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhbw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhbw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhbw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhwd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhwd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhwd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhwd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpckhwd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpckhwd (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklbw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklbw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklbw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklbw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklbw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklwd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklwd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpunpcklwd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpunpcklwd (%rax), %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s index 963363964ad91c..598a574ae7f507 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512bwvl.s @@ -2026,514 +2026,514 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 258.67 323.17 144.00 144.00 10.00 420.17 - 10.00 10.00 10.00 - 144.00 - +# CHECK-NEXT: 258.67 323.17 144.00 144.00 10.00 420.17 - 10.00 10.00 10.00 144.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vdbpsadbw $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vdbpsadbw $0, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu8 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu8 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu8 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu8 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu8 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu8 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu8 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu8 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu8 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu8 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu16 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu16 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu16 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu16 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu16 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu16 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu16 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu16 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu16 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu16 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsb %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsb (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsb (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpabsw %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpabsw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpabsw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackssdw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackssdw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpacksswb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpacksswb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackusdw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackusdw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpackuswb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpackuswb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpaddusw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpaddusw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpaddusw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpalignr $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpalignr $1, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpavgw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpavgw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpavgw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpblendmw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpblendmw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpblendmw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastb (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastb (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastb %eax, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpbroadcastw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpbroadcastw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastw %eax, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpermw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vpermw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vpermw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2w %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2w (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2w (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2w %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2w (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2w (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddubsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddubsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaddwd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaddwd (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxub %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxub (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxub (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmaxuw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmaxuw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminub %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminub (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminub (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpminuw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpminuw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpminuw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovb2m %xmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovw2m %xmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovb2m %ymm0, %k0 @@ -2543,17 +2543,17 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2b %k0, %ymm0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2w %k0, %ymm0 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbw %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovswb %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovswb %xmm16, %xmm19 {%k1} @@ -2579,370 +2579,370 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpmovwb %ymm16, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - 0.50 1.50 - 0.50 0.50 0.50 - - - vpmovwb %ymm16, (%rax) {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbw %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbw (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbw (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbw %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbw (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbw (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhrsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhrsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhuw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhuw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmulhw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmulhw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmulhw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmullw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmullw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmullw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpsadbw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpsadbw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpsadbw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufhw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufhw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufhw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshuflw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshuflw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshuflw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpslldq $1, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpslldq $1, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpslldq $1, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpslldq $1, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpslldq $1, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsllw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsllw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsllw %xmm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsllw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsllw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsravw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsravw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsravw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsraw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsraw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsraw %xmm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsraw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsraw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpsrldq $1, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpsrldq $1, (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpsrldq $1, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpsrldq $1, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpsrldq $1, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsrlw $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw $0, (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - vpsrlw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vpsrlw %xmm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsrlw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsrlw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubsw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubsw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubsw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpsubusw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpsubusw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpsubusw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmb %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmb (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmw %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhbw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhbw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhwd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhwd (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklbw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklbw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpcklwd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpcklwd (%rax), %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s index 61e9c8b9b38aa0..f4dd6b2fa63b77 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cd.s @@ -110,45 +110,45 @@ vplzcntq (%rax){1to8}, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 243.50 - 8.00 8.00 - 297.50 - - - - - 8.00 - +# CHECK-NEXT: 243.50 - 8.00 8.00 - 297.50 - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmb2q %k0, %zmm16 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmw2d %k0, %zmm16 # CHECK-NEXT: 15.50 - - - - 21.50 - - - - - - - vpconflictd %zmm16, %zmm19 -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax), %zmm19 -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax){1to16}, %zmm19 +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax), %zmm19 +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax){1to16}, %zmm19 # CHECK-NEXT: 15.50 - - - - 21.50 - - - - - - - vpconflictd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 15.50 - - - - 21.50 - - - - - - - vpconflictd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - - 0.33 - vpconflictd (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 15.00 - 0.33 0.33 - 21.00 - - - - 0.33 - - vpconflictd (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 10.00 - - - - 12.00 - - - - - - - vpconflictq %zmm16, %zmm19 -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax), %zmm19 -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax), %zmm19 +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax){1to8}, %zmm19 # CHECK-NEXT: 10.00 - - - - 12.00 - - - - - - - vpconflictq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 10.00 - - - - 12.00 - - - - - - - vpconflictq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - - 0.33 - vpconflictq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 9.50 - 0.33 0.33 - 11.50 - - - - 0.33 - - vpconflictq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vplzcntq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to8}, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s index d35c8657cf9fa7..8dac8e96ed6767 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512cdvl.s @@ -190,7 +190,7 @@ vplzcntq (%rax){1to4}, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 148.00 139.50 16.00 16.00 - 238.00 4.50 - - - - 16.00 - +# CHECK-NEXT: 148.00 139.50 16.00 16.00 - 238.00 4.50 - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -199,74 +199,74 @@ vplzcntq (%rax){1to4}, %ymm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmw2d %k0, %xmm16 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vpbroadcastmw2d %k0, %ymm16 # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictd %xmm16, %xmm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax), %xmm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax){1to4}, %xmm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax), %xmm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax){1to4}, %xmm19 # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictd (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictd (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 6.00 5.50 - - - 11.00 0.50 - - - - - - vpconflictd %ymm16, %ymm19 -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax), %ymm19 -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax){1to8}, %ymm19 +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax), %ymm19 +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax){1to8}, %ymm19 # CHECK-NEXT: 6.00 5.50 - - - 11.00 0.50 - - - - - - vpconflictd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 6.00 5.50 - - - 11.00 0.50 - - - - - - vpconflictd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - - 0.33 - vpconflictd (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 5.67 5.17 0.33 0.33 - 10.67 0.50 - - - 0.33 - - vpconflictd (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpconflictq %xmm16, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax), %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax), %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpconflictq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpconflictq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpconflictq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpconflictq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictq %ymm16, %ymm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax), %ymm19 -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax), %ymm19 +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax){1to4}, %ymm19 # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 4.17 4.17 - - - 6.67 - - - - - - - vpconflictq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - - 0.33 - vpconflictq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3.83 3.83 0.33 0.33 - 6.33 - - - - 0.33 - - vpconflictq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntd (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntd (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vplzcntq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vplzcntq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vplzcntq (%rax){1to4}, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s index cf451f6feff858..504eda42d3010f 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dq.s @@ -873,7 +873,7 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 256.00 22.00 68.00 68.00 5.50 165.00 - 5.50 5.50 5.50 - 68.00 - +# CHECK-NEXT: 256.00 22.00 68.00 68.00 5.50 165.00 - 5.50 5.50 5.50 68.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -882,7 +882,7 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandb %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kandnb %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovb %k0, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - kmovb (%rax), %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - kmovb (%rax), %k2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - kmovb %k0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - kmovb %eax, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kmovb %k0, %eax @@ -896,173 +896,173 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxnorb %k0, %k1, %k2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - kxorb %k0, %k1, %k2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandnps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandnps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vandps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vandps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x2 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x2 (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x2 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x2 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x8 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x8 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf32x8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf64x2 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x2 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcastf64x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x8 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf32x8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf64x2 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcastf64x2 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x2 (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x2 (%rax), %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x2 (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x2 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x8 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x8 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti32x8 (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti64x2 (%rax), %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x2 (%rax), %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vbroadcasti64x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x2 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x8 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x8 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti32x8 (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti64x2 (%rax), %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x2 (%rax), %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vbroadcasti64x2 (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2qq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2qq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2qq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2uqq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2uqq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtpd2uqq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2qq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2qq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2qq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2uqq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2uqq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtps2uqq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtps2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtqq2pd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtqq2pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtqq2pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtqq2ps %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtqq2ps %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtqq2ps %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2qq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2qq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2qq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2uqq %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2uqq %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvttpd2uqq %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2qq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2qq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2qq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2qq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2uqq %ymm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2uqq %ymm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvttps2uqq %ymm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttps2uqq (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtuqq2pd %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtuqq2pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vcvtuqq2pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtuqq2ps %zmm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtuqq2ps %zmm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - vcvtuqq2ps %zmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf32x8 $1, %zmm16, %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf32x8 $1, %zmm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf32x8 $1, %zmm16, %ymm19 {%k1} @@ -1084,184 +1084,184 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti64x2 $1, %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti64x2 $1, %zmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %zmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdz $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdz $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to8}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %zmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdz $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %zmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsz $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to16}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsz $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to16}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %zmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsz $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to16}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to16}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasssd $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasssd $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasssd $171, (%rax), %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasssd $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasssd $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasssd $171, (%rax), %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassss $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassss $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassss $171, (%rax), %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassss $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassss $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassss $171, (%rax), %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf32x8 $1, %ymm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti32x8 $1, %ymm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti32x8 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti32x8 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti32x8 $1, %ymm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti32x8 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %zmm16, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti64x2 $1, (%rax), %zmm16, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti64x2 $1, (%rax), %zmm16, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vinserti64x2 $1, (%rax), %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vorps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrd $1, %xmm16, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrd $1, %xmm16, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - vpextrq $1, %xmm16, %rcx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - vpextrq $1, %xmm16, (%rax) # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrd $1, %ecx, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrd $1, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrd $1, (%rax), %xmm16, %xmm19 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - vpinsrq $1, %rcx, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpinsrq $1, (%rax), %xmm16, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpinsrq $1, (%rax), %xmm16, %xmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2d %k0, %zmm0 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vpmovm2q %k0, %zmm0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovd2m %zmm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovq2m %zmm0, %k0 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpmullq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %zmm17, %zmm19 # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpmullq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 3.00 - - - - - - - - - - - - vpmullq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, {sae}, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangepd $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, {sae}, %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vrangeps $ab, {sae}, %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangesd $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangesd $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangess $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangess $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to8}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to8}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, {sae}, %zmm16, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, {sae}, %zmm16, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreducepd $ab, {sae}, %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, %zmm16, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to16}, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, {sae}, %zmm16, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, {sae}, %zmm16, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vreduceps $ab, {sae}, %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducesd $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducesd $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducesd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducesd $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducess $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducess $ab, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducess $ab, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, {sae}, %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducess $ab, {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorpd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorpd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorpd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorpd (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorps %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorps %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - vxorps %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vxorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s index 2b87f5ecc3d0fe..1cc0d9a63f2371 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512dqvl.s @@ -1139,322 +1139,322 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 220.67 216.67 110.33 110.33 2.00 159.67 - 2.00 2.00 2.00 - 110.33 - +# CHECK-NEXT: 220.67 216.67 110.33 110.33 2.00 159.67 - 2.00 2.00 2.00 110.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandnps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandnps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vandps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vandps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x2 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x2 (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x2 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastf32x2 %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x2 (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf64x2 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf64x2 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf64x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf64x2 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf64x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf64x2 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x2 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x2 (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x2 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x2 (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcasti32x2 %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x2 (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti64x2 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti64x2 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti64x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x2 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti64x2 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti64x2 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti64x2 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2qq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtpd2uqq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2qq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2uqq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtqq2pd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtqq2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtqq2ps %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtqq2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2qq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttpd2uqq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttpd2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2qq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2qq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttps2uqq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2uqq (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtuqq2pd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtuqq2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtuqq2ps %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtuqq2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf64x2 $1, %ymm16, %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextractf64x2 $1, %ymm16, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextractf64x2 $1, %ymm16, %xmm19 {%k1} @@ -1466,41 +1466,41 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vextracti64x2 $1, %ymm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vextracti64x2 $1, %ymm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdx $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to2}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdx $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to2}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdx $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to2}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to2}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %ymm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdy $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdy $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to4}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclasspd $171, %ymm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspdy $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspd $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspdy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspd $171, (%rax){1to4}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %xmm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsx $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsx $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to4}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %xmm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsx $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to4}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %ymm16, %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsy $171, (%rax), %k1 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsy $171, (%rax), %k1 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to8}, %k1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vfpclassps $171, %ymm16, %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclasspsy $171, (%rax), %k1 {%k2} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vfpclassps $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclasspsy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vfpclassps $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %ymm16, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinsertf64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinsertf64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %ymm16, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti64x2 $1, (%rax), %ymm16, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti64x2 $1, (%rax), %ymm16, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vinserti64x2 $1, %xmm16, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vinserti64x2 $1, (%rax), %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2d %k0, %xmm0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2q %k0, %xmm0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpmovm2d %k0, %ymm0 @@ -1510,158 +1510,158 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovd2m %ymm0, %k0 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmovq2m %ymm0, %k0 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vorps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %ymm17, %ymm19 # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.50 1.50 - - - - - - - - - - - vpmullq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - - 0.33 - vpmullq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.50 1.50 0.33 0.33 - - - - - - 0.33 - - vpmullq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangepd $ab, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangepd $ab, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vrangeps $ab, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vrangeps $ab, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreducepd $ab, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreducepd $ab, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vreduceps $ab, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vreduceps $ab, (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vxorps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vxorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s index a4b5f1733e176c..b10283b08984a6 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfni.s @@ -79,31 +79,31 @@ vgf2p8mulb (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 24.00 - 5.00 5.00 - - - - - - - 5.00 - +# CHECK-NEXT: 24.00 - 5.00 5.00 - - - - - - 5.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineinvqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8affineqb $0, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vgf2p8mulb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s index db03371ee0ff68..a1388eeac6d8af 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512gfnivl.s @@ -130,55 +130,55 @@ vgf2p8mulb (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 24.00 24.00 10.00 10.00 - - - - - - - 10.00 - +# CHECK-NEXT: 24.00 24.00 10.00 10.00 - - - - - - 10.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s index 440a81ffa016fa..a45e853eadb37b 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifma.s @@ -66,25 +66,25 @@ vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 18.00 - 4.00 4.00 - - - - - - - 4.00 - +# CHECK-NEXT: 18.00 - 4.00 4.00 - - - - - - 4.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52huq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpmadd52luq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to8}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s index f21492bfb979ee..85d2d0fa0a36eb 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512ifmavl.s @@ -104,43 +104,43 @@ vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 18.00 18.00 8.00 8.00 - - - - - - - 8.00 - +# CHECK-NEXT: 18.00 18.00 8.00 8.00 - - - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52huq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52huq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpmadd52luq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpmadd52luq (%rdi){1to4}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s index 41a22ff1c3e2ce..e894f2baa25e06 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaes.s @@ -48,15 +48,15 @@ vaesenclast (%rax), %zmm17, %zmm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 - 1.33 1.33 - - - - - - - 1.33 - +# CHECK-NEXT: 8.00 - 1.33 1.33 - - - - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesdec %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesdeclast %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesenc %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vaesenclast %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s index 78f692ba616692..7bea5b88dc3ad2 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vaesvl.s @@ -68,23 +68,23 @@ vaesenclast (%rax), %ymm17, %ymm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - - 2.67 - +# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %ymm17, %ymm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s index 83f4d3f4032241..180c9e938b16d4 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi.s @@ -86,34 +86,34 @@ vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.00 - 5.00 5.00 - 45.00 - - - - - 5.00 - +# CHECK-NEXT: 6.00 - 5.00 5.00 - 45.00 - - - - 5.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2b %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2b (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2b (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2b %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2b (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2b (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermi2b %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermi2b (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermi2b (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2b %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2b (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2b (%rax), %zmm17, %zmm19 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2b %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2b (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2b (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - vpermt2b %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - vpermt2b (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - vpermt2b (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s index e4b023feacf9f8..ed8a4170d09384 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2.s @@ -276,14 +276,14 @@ vpshrdw $1, (%rax), %zmm17, %zmm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 117.40 0.40 21.33 21.33 3.00 56.40 1.40 3.00 3.00 3.00 0.40 21.33 - +# CHECK-NEXT: 117.40 0.40 21.33 21.33 3.00 56.40 1.40 3.00 3.00 3.00 21.33 0.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressb %zmm16, %zmm19 -# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 0.20 - - vpcompressb %zmm16, (%rax) +# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 - 0.20 - vpcompressb %zmm16, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressb %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 0.20 - - vpcompressb %zmm16, (%rax) {%k1} +# CHECK-NEXT: 1.20 0.20 - - 1.00 2.20 0.20 1.00 1.00 1.00 - 0.20 - vpcompressb %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressb %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressw %zmm16, %zmm19 # CHECK-NEXT: 1.50 - - - 0.50 2.00 0.50 0.50 0.50 0.50 - - - vpcompressw %zmm16, (%rax) @@ -291,108 +291,108 @@ vpshrdw $1, (%rax), %zmm17, %zmm19 {k1}{z} # CHECK-NEXT: 1.50 - - - 0.50 2.00 0.50 0.50 0.50 0.50 - - - vpcompressw %zmm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressw %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %zmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %zmm16, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %zmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %zmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldd $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldd $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldd $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldq $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldq $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldq $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshldw $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshldw $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshldw $1, (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldw $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshldw $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshldw $1, (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdd $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdd $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdd $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdq $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdq $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdq $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdq $1, (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvq %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvq %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvq %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvw %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvw %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdvw %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpshrdw $1, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpshrdw $1, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpshrdw $1, (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdw $1, %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.50 - - - - 0.50 - - - - - - - vpshrdw $1, %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - vpshrdw $1, (%rax), %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s index 6aed2f069b7d41..3db09bc332d8f4 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmi2vl.s @@ -524,7 +524,7 @@ vpshrdw $1, (%rax), %ymm17, %ymm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 129.33 117.33 42.67 42.67 4.00 101.33 4.00 4.00 4.00 4.00 - 42.67 - +# CHECK-NEXT: 129.33 117.33 42.67 42.67 4.00 101.33 4.00 4.00 4.00 4.00 42.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -549,214 +549,214 @@ vpshrdw $1, (%rax), %ymm17, %ymm19 {k1}{z} # CHECK-NEXT: 1.50 - - - 0.50 2.00 0.50 0.50 0.50 0.50 - - - vpcompressw %ymm16, (%rax) {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpcompressw %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %xmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %ymm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandb (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandb (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandb %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %xmm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %xmm16, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %ymm19 # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - vpexpandw (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - vpexpandw (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 2.00 - - - - - - - vpexpandw %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldd $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldd $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldd $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldq $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldq $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldq $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldw $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldw $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldw $1, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshldw $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshldw $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshldw $1, (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshldw $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshldw $1, (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdd $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdd $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdd $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdq $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdq $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdq $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdvw %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdvw (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdw $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdw $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdw $1, (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpshrdw $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpshrdw $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpshrdw $1, (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.83 0.83 - - - 0.33 - - - - - - - vpshrdw $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - - 0.33 - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.83 0.83 0.33 0.33 - 0.33 - - - - 0.33 - - vpshrdw $1, (%rax), %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s index 5be411d906dc6d..96c1a4e93c2767 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vbmivl.s @@ -144,61 +144,61 @@ vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {k1}{z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 10.00 10.00 - 86.00 - - - - - 10.00 - +# CHECK-NEXT: 8.00 8.00 10.00 10.00 - 86.00 - - - - 10.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermb (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermi2b %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermi2b (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermi2b (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 2.33 - - - - - - - vpermt2b %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - - 0.33 - vpermt2b (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 2.33 - - - - 0.33 - - vpermt2b (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmultishiftqb %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmultishiftqb (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s index c1d6d5776e5875..bc0b5b697f3b7a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s @@ -3274,1531 +3274,1531 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 404.53 412.53 329.00 329.00 46.00 797.53 3.20 46.00 46.00 46.00 3.20 329.00 - +# CHECK-NEXT: 404.53 412.53 329.00 329.00 46.00 797.53 3.20 46.00 46.00 46.00 329.00 3.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vaddps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastf32x4 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x4 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastf32x4 (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcasti32x4 (%rax), %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x4 (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcasti32x4 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastf32x4 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x4 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastf32x4 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcasti32x4 (%rax), %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x4 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcasti32x4 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastsd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastsd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastsd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastsd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastsd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastsd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastsd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vbroadcastss (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vbroadcastss (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vbroadcastss %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vbroadcastss (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vbroadcastss (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqpd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vcmpeqps %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtdq2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2psx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %ymm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqy (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqy (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqx (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to2}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqx (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %xmm16, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %ymm16, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - - 2.67 - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherqps (%rax,%ymm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vgatherdps (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vgatherqps (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - 2.67 - - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherqps (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vgatherdps (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vgatherqps (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmaxps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmaxps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vminps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vminps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovapd %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovapd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovapd (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovapd %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovapd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovapd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovapd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovaps %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovaps (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovaps (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovaps %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovaps %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovaps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovaps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa32 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa32 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa32 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa32 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa32 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa32 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa32 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa32 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa32 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa64 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa64 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa64 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqa64 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqa64 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqa64 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqa64 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqa64 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqa64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqa64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu32 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu32 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu32 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu32 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu32 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu32 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu32 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu32 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu32 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu32 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu64 %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu64 (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu64 (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovdqu64 %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovdqu64 (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovdqu64 (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovdqu64 %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovdqu64 %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovdqu64 (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovdqu64 (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovddup (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovddup (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vmovddup %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovddup (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - {evex} vmovntdqa (%rax), %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - {evex} vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovddup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - {evex} vmovntdqa (%rax), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - {evex} vmovntdqa (%rax), %ymm0 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovshdup (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovshdup (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovshdup %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovshdup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovshdup (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovsldup (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovsldup (%rax), %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vmovsldup %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovsldup (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovsldup (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovupd %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovupd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovupd (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovupd %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovupd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovupd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovupd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovups %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %xmm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %xmm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - - - - - - - - - vmovups %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vmovups (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vmovups (%rax), %ymm19 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm16, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - vmovups %ymm16, (%rax) {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vmovups %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vmovups (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vmovups (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vmulps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vmulps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpaddq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - vpbroadcastq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - vpbroadcastq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpbroadcastq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpbroadcastq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpbroadcastq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpgtq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpcmpequq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to2}, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to4}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd $0, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %xmm16, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to4}, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to8}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps $0, (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermilps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermilps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to4}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd $0, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %ymm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to4}, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq $0, (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq $0, (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpermq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - - 2.67 - vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - - 1.33 - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} -# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - - 0.67 - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 2.67 2.67 - 1.00 - - - - 2.67 - - vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 1.00 1.33 1.33 - 1.00 - - - - 1.33 - - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 1.00 0.50 0.67 0.67 - 0.50 - - - - 0.67 - - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxbq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxbq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxbq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxbq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxbq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxbq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxdq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxdq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxdq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxdq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxdq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxdq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovsxwq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovsxwq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovsxwq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovsxwq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovsxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovsxwq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxbq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxbq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxbq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxbq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxbq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxbq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxdq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxdq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxdq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxdq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxdq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxdq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwd (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwd %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwd (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpmovzxwq %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpmovzxwq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpmovzxwq (%rax), %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpmovzxwq %xmm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpmovzxwq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpmovzxwq (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - vpmulld %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - vpmulld (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterdd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vpscatterdq %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vpscatterqd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vpscatterqq %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vpscatterdd %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterdq %ymm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterqd %xmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vpscatterqq %ymm1, (%rdx,%ymm0,4) {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %ymm16, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to8}, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to8}, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %ymm16, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpshufd $0, %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpshufd $0, (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - vpsubq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestmq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to4}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to8}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmd %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %xmm0, %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %xmm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to2}, %xmm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %xmm0, %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %xmm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %ymm0, %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %ymm1, %k2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to4}, %ymm1, %k2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vptestnmq %ymm0, %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax), %ymm1, %k2 {%k3} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckhdq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckhdq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vpunpckldq %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 0.20 - - vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 0.20 - - vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1} -# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 0.20 - - vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vpunpckldq (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterdps %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vscatterdpd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vscatterqps %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 1.00 0.20 0.20 1.00 1.00 1.00 - 0.20 - vscatterqpd %xmm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 4.00 0.20 0.20 4.00 4.00 4.00 - 0.20 - vscatterdps %ymm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterdpd %ymm1, (%rdx,%xmm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterqps %xmm1, (%rdx,%ymm0,4) {%k1} +# CHECK-NEXT: 1.70 0.70 - - 2.00 0.20 0.20 2.00 2.00 2.00 - 0.20 - vscatterqpd %ymm1, (%rdx,%ymm0,4) {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshuff64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshuff64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi32x4 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi32x4 $0, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vshufi64x2 $0, %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vshufi64x2 $0, (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm16, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to2}, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to2}, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to2}, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to4}, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to4}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtpd %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtpd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtpd (%rax){1to4}, %ymm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm16, %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to4}, %xmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm16, %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to4}, %xmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %xmm16, %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %xmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to4}, %xmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm16, %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to8}, %ymm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm16, %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to8}, %ymm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vsqrtps %ymm16, %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax), %ymm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vsqrtps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vsqrtps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - vsubps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - vsubps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpckhps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpckhps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to2}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklpd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklpd (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vunpcklps %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vunpcklps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s index 2bfc313877d1e3..f24d20b19c69a4 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnni.s @@ -104,43 +104,43 @@ vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.00 - 8.00 8.00 - - - - - - - 8.00 - +# CHECK-NEXT: 36.00 - 8.00 8.00 - - - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusds %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusds %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpbusds %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssd %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssd %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssd %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssds %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %zmm17, %zmm19 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %zmm17, %zmm19 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssds %zmm16, %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1.00 - - - - - - - - - - - - vpdpwssds %zmm16, %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} {z} -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s index 958193d0b4497a..2b661601690a93 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vnnivl.s @@ -180,79 +180,79 @@ vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.00 36.00 16.00 16.00 - - - - - - - 16.00 - +# CHECK-NEXT: 36.00 36.00 16.00 16.00 - - - - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm17, %xmm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm17, %xmm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to4}, %xmm17, %xmm19 {%k1} {z} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm17, %ymm19 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm17, %ymm19 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm16, %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm16, %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} {z} -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s index 0c28cf84003875..35c5137380fb75 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdq.s @@ -33,9 +33,9 @@ vpclmulqdq $11, (%rax), %zmm17, %zmm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %zmm16, %zmm17, %zmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %zmm17, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s index 700cb1a03db500..1b5597aeaa57f0 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpclmulqdqvl.s @@ -38,11 +38,11 @@ vpclmulqdq $11, (%rax), %ymm17, %ymm19 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.67 0.67 - 4.00 - - - - - 0.67 - +# CHECK-NEXT: - - 0.67 0.67 - 4.00 - - - - 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %xmm17, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %ymm16, %ymm17, %ymm19 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %ymm17, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %ymm17, %ymm19 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s index 174540dc972bdd..4587dbd97e93f9 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdq.s @@ -70,25 +70,25 @@ vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 4.00 4.00 - 18.00 - - - - - 4.00 - +# CHECK-NEXT: - - 4.00 4.00 - 18.00 - - - - 4.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to16}, %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to16}, %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to16}, %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to16}, %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %zmm1, %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %zmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to8}, %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %zmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to8}, %zmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %zmm1, %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %zmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to8}, %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %zmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to8}, %zmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %zmm1, %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %zmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %zmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s index 9b5ebc7e0e9ea8..d425c64196fef9 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vpopcntdqvl.s @@ -112,43 +112,43 @@ vpopcntq (%rdi){1to4}, %ymm0 {%k1} {z} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 8.00 8.00 - 36.00 - - - - - 8.00 - +# CHECK-NEXT: - - 8.00 8.00 - 36.00 - - - - 8.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to4}, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to4}, %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to4}, %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to4}, %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to4}, %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to4}, %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to8}, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to8}, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to8}, %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to8}, %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntd %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi), %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntd (%rdi){1to8}, %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntd (%rdi){1to8}, %ymm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %xmm1, %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %xmm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to2}, %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %xmm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to2}, %xmm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %xmm1, %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %xmm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to2}, %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %xmm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to2}, %xmm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %xmm1, %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %xmm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to2}, %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %xmm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to2}, %xmm0 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %ymm1, %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %ymm0 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to4}, %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %ymm0 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to4}, %ymm0 # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %ymm1, %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %ymm0 {%k1} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to4}, %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %ymm0 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to4}, %ymm0 {%k1} # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpopcntq %ymm1, %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi), %ymm0 {%k1} {z} -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpopcntq (%rdi){1to4}, %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi), %ymm0 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpopcntq (%rdi){1to4}, %ymm0 {%k1} {z} diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s index 9e0ca1373214e2..b8fec0d6d8341b 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxgfni.s @@ -58,19 +58,19 @@ vgf2p8mulb (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - - 2.00 - +# CHECK-NEXT: 6.00 6.00 2.00 2.00 - - - - - - 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineinvqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineinvqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8affineqb $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8affineqb $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vgf2p8mulb %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vgf2p8mulb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vgf2p8mulb (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s index 58ab6f9e8f2989..2b64fedbd1f4e2 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s @@ -68,23 +68,23 @@ vpdpwssds (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - - 2.67 - +# CHECK-NEXT: 8.00 8.00 2.67 2.67 - - - - - - 2.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpbusds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vpdpwssds (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s index 253cbbcafee6de..9f6cf8cefd8acd 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi1.s @@ -93,33 +93,33 @@ tzcnt (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 5.33 4.33 - +# CHECK-NEXT: 2.00 15.33 4.33 4.33 - 5.33 2.00 - - - 4.33 5.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnl %eax, %ebx, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnl %eax, %ebx, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnl (%rax), %ebx, %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - andnq %rax, %rbx, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - andnq %rax, %rbx, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - andnq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bextrq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - bextrq %rax, (%rbx), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsil %eax, %ecx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - bextrq %rax, (%rbx), %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsil %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsil (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsiq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsiq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsiq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsmskq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsmskq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsmskq (%rax), %rcx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrl %eax, %ecx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrl %eax, %ecx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrl (%rax), %ecx -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - blsrq %rax, %rcx +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - blsrq %rax, %rcx # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - blsrq (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntw %ax, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - tzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - tzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - tzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s index ed6ab74a9d4e87..0ba2cd6203d2db 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-bmi2.s @@ -108,39 +108,39 @@ shrx %rax, (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 0.40 5.33 - +# CHECK-NEXT: 9.40 16.40 5.33 5.33 - 2.40 9.40 - - - 5.33 0.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhil %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhil %eax, (%rbx), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhil %eax, (%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - bzhiq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bzhiq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mulxl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulxq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulxq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pdepq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pdepq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pdepq (%rax), %rbx, %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextl %eax, %ebx, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextl (%rax), %ebx, %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextl (%rax), %ebx, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - pextq %rax, %rbx, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - pextq (%rax), %rbx, %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - pextq (%rax), %rbx, %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxl $1, %eax, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxl $1, (%rax), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxl $1, (%rax), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - rorxq $1, %rax, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - rorxq $1, (%rax), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - rorxq $1, (%rax), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sarxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sarxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shlxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shlxq %rax, (%rbx), %rcx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxl %eax, %ebx, %ecx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxl %eax, (%rbx), %ecx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxl %eax, (%rbx), %ecx # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrxq %rax, %rbx, %rcx -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - shrxq %rax, (%rbx), %rcx +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - shrxq %rax, (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s index ba11eea9ce6389..e7a06f80511581 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clflushopt.s @@ -31,8 +31,8 @@ clflushopt (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clflushopt (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clflushopt (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s index 06192b2184ee8f..f5f7ce7d8f52a6 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-clwb.s @@ -31,8 +31,8 @@ clwb (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 0.20 - - clwb (%rax) +# CHECK-NEXT: 0.20 0.20 - - 0.50 0.20 0.20 0.50 0.50 0.50 - 0.20 - clwb (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s index f7b17a75b5566f..a305a4badaca59 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmov.s @@ -226,7 +226,7 @@ cmovgq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - - 16.00 - +# CHECK-NEXT: 48.00 - 16.00 16.00 - - 48.00 - - - 16.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -246,22 +246,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlew %si, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgw %si, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnow (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlw (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlew (%rax), %di -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnow (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlw (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlew (%rax), %di +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnol %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbl %esi, %edi @@ -278,22 +278,22 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovlel %esi, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgl %esi, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnol (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovael (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoval (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpl (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovll (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlel (%rax), %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnol (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovael (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoval (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpl (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovll (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlel (%rax), %edi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovnoq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovbq %rsi, %rdi @@ -310,19 +310,19 @@ cmovgq (%rax), %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgeq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovleq %rsi, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cmovgq %rsi, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnoq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmoveq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovneq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovbeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovaq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnsq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovnpq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovlq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgeq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovleq (%rax), %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - cmovgq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnoq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmoveq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovneq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovbeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovaq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnsq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovnpq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovlq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgeq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovleq (%rax), %rdi +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - cmovgq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s index 07711da6b4886d..2af2da9a6fa027 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-cmpxchg.s @@ -37,11 +37,11 @@ lock cmpxchg16b (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 4.40 1.33 - +# CHECK-NEXT: 21.40 10.40 1.33 1.33 2.00 10.40 17.40 2.00 2.00 2.00 1.33 4.40 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - cmpxchg16b (%rax) -# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.80 0.33 - lock cmpxchg8b (%rax) -# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 1.40 0.33 - lock cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - cmpxchg16b (%rax) +# CHECK-NEXT: 4.30 2.80 0.33 0.33 0.50 0.80 4.30 0.50 0.50 0.50 0.33 0.80 - lock cmpxchg8b (%rax) +# CHECK-NEXT: 6.40 2.40 0.33 0.33 0.50 4.40 4.40 0.50 0.50 0.50 0.33 1.40 - lock cmpxchg16b (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s index 2c05d5ae3b8883..eaf6190e113918 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-f16c.s @@ -48,14 +48,14 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 - 0.67 - +# CHECK-NEXT: 4.00 4.00 0.67 0.67 1.00 4.00 - 1.00 1.00 1.00 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vcvtph2ps (%rax), %ymm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %xmm0, %xmm2 # CHECK-NEXT: 0.50 0.50 - - 0.50 - - 0.50 0.50 0.50 - - - vcvtps2ph $0, %xmm0, (%rax) # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - vcvtps2ph $0, %ymm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s index a01ac71efec200..97d2e70fcfd43b 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-fma.s @@ -508,199 +508,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - - 32.00 - +# CHECK-NEXT: 96.00 96.00 32.00 32.00 - - - - - - 32.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vfnmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s index 097af7bccaf741..84e28195593628 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-gfni.s @@ -43,13 +43,13 @@ gf2p8mulb (%rax), %xmm1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: 3.00 3.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineinvqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineinvqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineinvqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8affineqb $0, %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8affineqb $0, (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8affineqb $0, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - gf2p8mulb %xmm0, %xmm1 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - gf2p8mulb (%rax), %xmm1 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - gf2p8mulb (%rax), %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s index 633c5994ae5307..e05a540eccb43e 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lea.s @@ -301,142 +301,142 @@ lea 1024(%rax, %rbx, 2), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - 9.00 - - +# CHECK-NEXT: 9.00 144.00 - - - 9.00 9.00 - - - - 9.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 0, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 0, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 0, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 0, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw (%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal (%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw -16(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024, %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024, %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024, %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024, %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%eax,%ebx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - leaw 1024(%rax,%rbx,2), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s index e6a480ca72de67..cb256ab1f5df8f 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-lzcnt.s @@ -43,13 +43,13 @@ lzcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - lzcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - lzcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - lzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s index 40f046388cbf1c..9fe4880b7c6133 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-mmx.s @@ -287,112 +287,112 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 - 15.33 - +# CHECK-NEXT: 75.50 - 15.33 15.33 1.00 40.00 0.50 1.00 1.00 1.00 15.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 5.50 - - - - 4.00 0.50 - - - - - - emms # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %mm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %mm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %mm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %mm0, (%rax) # CHECK-NEXT: - - - - - 2.00 - - - - - - - packsswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packsswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packsswb (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packssdw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packssdw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packssdw (%rax), %mm2 # CHECK-NEXT: - - - - - 2.00 - - - - - - - packuswb %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - packuswb (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - packuswb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - paddusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pand %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pand (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pand (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pandn %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pandn (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pandn (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpeqw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddwd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmullw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - por %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - por (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - por (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pslld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psllw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrad %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psraw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrld %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw $1, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psrlw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubb %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubb (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubb (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubd (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psubusw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhdq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhdq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhdq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckhwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckhwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckhwd (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpckldq %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpckldq (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpckldq (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - punpcklwd %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - punpcklwd (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - punpcklwd (%rax), %mm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - pxor %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - pxor (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - pxor (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s index c9a6eda7df95e6..87fc2921ccc87c 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-movbe.s @@ -43,13 +43,13 @@ movbe (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 0.20 1.00 - +# CHECK-NEXT: 2.20 4.20 1.00 1.00 1.50 0.20 2.20 1.50 1.50 1.50 1.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbew %cx, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - movbew (%rax), %cx +# CHECK-NEXT: 0.70 0.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - movbew (%rax), %cx # CHECK-NEXT: - 1.00 - - 0.50 - - 0.50 0.50 0.50 - - - movbel %ecx, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - movbel (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - movbel (%rax), %ecx # CHECK-NEXT: 0.50 1.00 - - 0.50 - 0.50 0.50 0.50 0.50 - - - movbeq %rcx, (%rax) -# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - - 0.33 - movbeq (%rax), %rcx +# CHECK-NEXT: 0.50 1.00 0.33 0.33 - - 0.50 - - - 0.33 - - movbeq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s index 5ce1dc32042b29..f16f2516d4c253 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-pclmul.s @@ -33,9 +33,9 @@ pclmulqdq $11, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - pclmulqdq $11, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pclmulqdq $11, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pclmulqdq $11, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s index cf827bd06cc59a..d82c3da8b06c07 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-popcnt.s @@ -43,13 +43,13 @@ popcntq (%rax), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - - 1.00 - +# CHECK-NEXT: - 6.00 1.00 1.00 - - - - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntw %cx, %cx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntw (%rax), %cx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntw (%rax), %cx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntl %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntl (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntl (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - popcntq %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - popcntq (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - popcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s index 590ed99d2bbeaf..292f545e60d4d7 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-prefetchw.s @@ -33,9 +33,9 @@ prefetchw (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.67 0.67 - - - - - - - 0.67 - +# CHECK-NEXT: - - 0.67 0.67 - - - - - - 0.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetch (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchw (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetch (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchw (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s index ba910849940557..87bee6e8109a7b 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdrand.s @@ -35,10 +35,10 @@ rdrand %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.80 1.00 - +# CHECK-NEXT: 17.30 27.30 1.00 1.00 - 14.30 11.30 - - - 1.00 1.80 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandw %ax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandl %eax -# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.60 0.33 - rdrandq %rax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandw %ax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandl %eax +# CHECK-NEXT: 5.77 9.10 0.33 0.33 - 4.77 3.77 - - - 0.33 0.60 - rdrandq %rax diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s index 9b0904200c9cf0..155486864472ef 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-rdseed.s @@ -35,10 +35,10 @@ rdseed %rax # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - - 1.00 - +# CHECK-NEXT: 19.50 24.00 1.00 1.00 - 18.00 10.50 - - - 1.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedw %ax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedl %eax -# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - - 0.33 - rdseedq %rax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedw %ax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedl %eax +# CHECK-NEXT: 6.50 8.00 0.33 0.33 - 6.00 3.50 - - - 0.33 - - rdseedq %rax diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s index 348eb91753d923..4295a872c25bf3 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse1.s @@ -336,131 +336,131 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 30.50 1.00 5.00 5.00 5.00 - 19.33 - +# CHECK-NEXT: 74.00 24.50 19.33 19.33 5.00 30.50 1.00 5.00 5.00 5.00 19.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comiss (%rax), %xmm1 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtpi2ps %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvtps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvtps2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 2.00 - - - - - - - cvtsi2ss %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2ssl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtsi2ssq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvtss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2si (%rax), %rcx # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - cvttps2pi %xmm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - cvttps2pi (%rax), %mm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - 1.00 - - - - - - - cvttss2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttss2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttss2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divss (%rax), %xmm2 -# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - - 0.33 - ldmxcsr (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divss (%rax), %xmm2 +# CHECK-NEXT: 1.83 0.33 0.33 0.33 - 0.33 0.50 - - - 0.33 - - ldmxcsr (%rax) # CHECK-NEXT: 2.00 - - - 0.50 - - 0.50 0.50 0.50 - - - maskmovq %mm0, %mm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movaps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movaps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movaps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movhlps %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movlhps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhps %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhps (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlps %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskps %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntq %mm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movss %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movss %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movss (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movss (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movups %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movups (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movups (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulss (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pavgw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pextrw $1, %mm0, %ecx # CHECK-NEXT: - - - - - 2.00 - - - - - - - pinsrw $1, %eax, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pinsrw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaxub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pminub %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %mm0, %ecx # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhuw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht0 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht1 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetcht2 (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - prefetchnta (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht0 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht1 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetcht2 (%rax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - prefetchnta (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pshufw $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufw $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufw $1, (%rax), %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rcpss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rcpss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rcpss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - rsqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - rsqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - rsqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - sfence # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufps $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufps $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtps %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtps (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtps (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtss %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtss (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtss (%rax), %xmm2 # CHECK-NEXT: 1.50 - - - 0.50 - 0.50 0.50 0.50 0.50 - - - stmxcsr (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subss %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subss (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subss (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomiss %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomiss (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhps (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorps %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s index 10e53a8df89f72..2b37853229def7 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse2.s @@ -692,96 +692,96 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 107.70 99.20 39.00 39.00 9.00 71.70 1.20 7.50 7.50 8.00 0.20 39.00 - +# CHECK-NEXT: 107.70 99.20 39.00 39.00 9.00 71.70 1.20 7.50 7.50 8.00 39.00 0.20 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andnpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andnpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andnpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - andpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - andpd (%rax), %xmm2 -# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 0.20 - - clflush (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - andpd (%rax), %xmm2 +# CHECK-NEXT: 0.70 0.20 - - 0.50 0.20 0.70 0.50 0.50 0.50 - 0.20 - clflush (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cmpeqsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cmpeqsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - comisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - comisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - comisd (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtdq2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtdq2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvtps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtps2pd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsd2si (%rax), %rcx # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtsi2sd %rcx, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdl (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtsi2sdq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtsi2sdq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvtss2sd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvtss2sd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - 1.00 - - - - - - - cvttpd2pi %xmm0, %mm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - - 0.33 - cvttpd2pi (%rax), %mm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - 1.00 - - - - 0.33 - - cvttpd2pi (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - cvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %ecx # CHECK-NEXT: 1.50 0.50 - - - - - - - - - - - cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %ecx -# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - - 0.33 - cvttsd2si (%rax), %rcx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: 1.50 0.50 0.33 0.33 - - - - - - 0.33 - - cvttsd2si (%rax), %rcx # CHECK-NEXT: 1.00 - - - - - - - - - - - - divpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - divsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - divsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - divsd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lfence # CHECK-NEXT: - - - - 1.50 - - - - 0.50 - - - maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - maxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - maxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - maxsd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - mfence # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - minsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - minsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - minsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movapd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movapd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movapd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movd %eax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - movdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqa %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqa (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movdqu %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movdqu (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movdqu (%rax), %xmm2 # CHECK-NEXT: 0.83 0.33 - - - 0.83 - - - - - - - movdq2q %xmm0, %mm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movhpd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - movhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - movhpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movlpd %xmm0, (%rax) -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - movlpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - movlpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movmskpd %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntil %eax, (%rax) # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntiq %rax, (%rax) @@ -789,177 +789,177 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movntpd %xmm0, (%rax) # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movq %xmm0, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - movq %rax, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movq (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - movq %xmm0, %rcx # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movq %xmm0, (%rax) # CHECK-NEXT: 1.33 0.33 - - - 0.33 - - - - - - - movq2dq %mm0, %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - movsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movsd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 - - 0.50 0.50 0.50 - - - movupd %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movupd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movupd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulpd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - mulsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - mulsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - mulsd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - orpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - orpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - orpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packssdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packssdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packssdw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packsswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packsswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packsswb (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packuswb %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packuswb (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packuswb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - paddq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - paddq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - paddq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - paddusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - paddusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - paddusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - paddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - paddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - paddw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pand %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pand (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pand (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pandn %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pandn (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pandn (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pavgw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pavgw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pavgw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpgtw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrw $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrw $1, %eax, %xmm0 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrw $1, (%rax), %xmm0 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrw $1, (%rax), %xmm0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddwd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxub (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminub %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminub (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminub (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmovmskb %xmm0, %ecx # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmullw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmullw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmullw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmuludq %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuludq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuludq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuludq (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - por %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - por (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - por (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - psadbw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - psadbw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - psadbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufd $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufhw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufhw $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshuflw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshuflw $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pslld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - pslld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pslld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pslld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pslldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psllw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psllw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psllw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psllw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrad $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrad %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrad (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrad (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psraw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psraw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psraw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psraw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrld $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrld %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrld (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrld (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - psrldq $1, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlq $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psrlw $1, %xmm2 # CHECK-NEXT: 0.50 1.00 - - - 0.50 - - - - - - - psrlw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psrlw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psrlw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubb %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubb (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubb (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubd (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - psubq %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - psubq (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - psubq (%rax), %mm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubq %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubq (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psubusw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psubusw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psubusw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - psubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - psubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - psubw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckhwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckhwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckhwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpckldq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpckldq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpckldq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklqdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklqdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - punpcklwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - punpcklwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - punpcklwd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pxor %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pxor (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pxor (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - shufpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - shufpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtpd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - sqrtsd %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - sqrtsd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - subsd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - subsd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - subsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - ucomisd %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - ucomisd (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - ucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpckhpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpckhpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpckhpd (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - unpcklpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - unpcklpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - unpcklpd (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - xorpd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - xorpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - xorpd (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s index daa8edd85b37c4..445015d9c430a9 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse3.s @@ -82,28 +82,28 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - - 3.33 - +# CHECK-NEXT: 2.00 9.00 3.33 3.33 - 27.00 4.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - addsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - addsubps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - addsubps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - haddps (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - haddps (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubpd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubpd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 2.50 - - - - - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - - 0.33 - hsubps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - lddqu (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 2.50 - - - - 0.33 - - hsubps (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - lddqu (%rax), %xmm2 # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - monitor # CHECK-NEXT: - - - - - 1.00 - - - - - - - movddup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movddup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movddup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movshdup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movshdup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movshdup (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - movsldup %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsldup (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsldup (%rax), %xmm2 # CHECK-NEXT: 1.75 1.75 - - - 2.75 3.75 - - - - - - mwait diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s index 02e212431cc0de..1dc78904ff9631 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse41.s @@ -269,37 +269,37 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 - 14.67 - +# CHECK-NEXT: 36.33 50.83 14.67 14.67 2.50 41.83 1.00 2.50 2.50 2.50 14.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendpd $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendps $11, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendps $11, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendps $11, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvpd %xmm0, (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.50 - - - 0.50 - - - - - - - dppd $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - - 0.33 - dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.50 0.33 0.33 - 0.50 - - - - 0.33 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: 1.50 2.00 - - - 2.00 0.50 - - - - - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - - 0.33 - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 1.50 2.00 0.33 0.33 - 2.00 0.50 - - - 0.33 - - dpps $22, (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.50 1.00 - 0.50 0.50 0.50 - - - extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - insertps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - insertps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movntdqa (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - insertps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movntdqa (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - mpsadbw $1, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - - 0.33 - mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 1.50 - - - - 0.33 - - mpsadbw $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - packusdw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - packusdw (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - packusdw (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 - - - 0.33 - - - - - - - pblendvb %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - - 0.33 - pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 - - - - 0.33 - - pblendvb %xmm0, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pblendw $11, %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pblendw $11, (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pcmpeqq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pcmpeqq (%rax), %xmm2 # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrb $1, %xmm0, %ecx # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 0.50 - - - 0.50 - - - - - - - pextrd $1, %xmm0, %ecx @@ -308,64 +308,64 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrq $1, %xmm0, (%rax) # CHECK-NEXT: - 0.50 - - 0.50 0.50 - 0.50 0.50 0.50 - - - pextrw $1, %xmm0, (%rax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - phminposuw %xmm0, %xmm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - phminposuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrb $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrd $1, %eax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrd $1, (%rax), %xmm1 # CHECK-NEXT: - 0.50 - - - 1.50 - - - - - - - pinsrq $1, %rax, %xmm1 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pinsrq $1, (%rax), %xmm1 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaxuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaxuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaxuw (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsb (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminsd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminud %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminud (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminud (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pminuw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pminuw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pminuw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovsxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovsxwq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxbw %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxbw (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxdq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxdq (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwd %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwd (%rax), %xmm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pmovzxwq %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pmovzxwq (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmuldq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmuldq (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmuldq (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - pmulld %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - pmulld (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - pmulld (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - ptest %xmm0, %xmm1 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - ptest (%rax), %xmm1 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - ptest (%rax), %xmm1 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundpd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundps $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundsd $1, (%rax), %xmm2 # CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - - 0.33 - roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 1.00 1.00 0.33 0.33 - - - - - - 0.33 - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s index f08243fdea7240..899538f44f53d8 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-sse42.s @@ -78,27 +78,27 @@ pcmpgtq (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - - 3.33 - +# CHECK-NEXT: 28.67 16.67 3.33 3.33 - 8.67 2.00 - - - 3.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32l %eax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32l (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32l (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32w %ax, %ecx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32w (%rax), %ecx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32w (%rax), %ecx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32b %al, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32b (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32b (%rax), %rcx # CHECK-NEXT: - 1.00 - - - - - - - - - - - crc32q %rax, %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - crc32q (%rax), %rcx +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - crc32q (%rax), %rcx # CHECK-NEXT: 4.17 1.67 - - - 1.67 0.50 - - - - - - pcmpestri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - - 0.33 - pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.83 1.33 0.33 0.33 - 1.33 0.50 - - - 0.33 - - pcmpestri $1, (%rax), %xmm2 # CHECK-NEXT: 4.50 2.00 - - - 2.00 0.50 - - - - - - pcmpestrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - - 0.33 - pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4.17 1.67 0.33 0.33 - 1.67 0.50 - - - 0.33 - - pcmpestrm $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistri $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistri $1, (%rax), %xmm2 # CHECK-NEXT: 3.00 - - - - - - - - - - - - pcmpistrm $1, %xmm0, %xmm2 -# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - - 0.33 - pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 3.00 - 0.33 0.33 - - - - - - 0.33 - - pcmpistrm $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - pcmpgtq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - pcmpgtq (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - pcmpgtq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s index 565eaaab6c513f..4578e37127595a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-ssse3.s @@ -188,71 +188,71 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - - 10.67 - +# CHECK-NEXT: 38.67 25.67 10.67 10.67 - 49.67 - - - - 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pabsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pabsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pabsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pabsw (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %mm0, %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %mm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %mm2 # CHECK-NEXT: - - - - - 1.00 - - - - - - - palignr $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - palignr $1, (%rax), %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - palignr $1, (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phaddsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phaddsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phaddsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phaddsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phaddsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phaddw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phaddw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phaddw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phaddw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phaddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phaddw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubd %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubd (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubd (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubd %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 2.00 - - - - - - - phsubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - - 0.33 - phsubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 2.00 - - - - 0.33 - - phsubsw (%rax), %mm2 # CHECK-NEXT: 0.50 1.50 - - - 1.00 - - - - - - - phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - - 0.33 - phsubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 1.50 0.33 0.33 - 1.00 - - - - 0.33 - - phsubsw (%rax), %xmm2 # CHECK-NEXT: 0.50 - - - - 2.50 - - - - - - - phsubw %mm0, %mm2 -# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - - 0.33 - phsubw (%rax), %mm2 +# CHECK-NEXT: 0.50 - 0.33 0.33 - 2.50 - - - - 0.33 - - phsubw (%rax), %mm2 # CHECK-NEXT: 0.33 1.33 - - - 1.33 - - - - - - - phsubw %xmm0, %xmm2 -# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - - 0.33 - phsubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 1.33 0.33 0.33 - 1.33 - - - - 0.33 - - phsubw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmaddubsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pmulhrsw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - pshufb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - pshufb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - pshufb (%rax), %mm2 # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - pshufb %xmm0, %xmm2 -# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - - 0.33 - pshufb (%rax), %xmm2 +# CHECK-NEXT: - 0.50 0.33 0.33 - 0.50 - - - - 0.33 - - pshufb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignb %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignb %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignb (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignb (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignd %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignd %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignd (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignd (%rax), %xmm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - psignw %mm0, %mm2 -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %mm2 +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %mm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - psignw %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - psignw (%rax), %xmm2 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - psignw (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s index 3cdda14f9d45f9..919cbe60a88db1 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vaes.s @@ -48,15 +48,15 @@ vaesenclast (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - - 1.33 - +# CHECK-NEXT: 4.00 4.00 1.33 1.33 - - - - - - 1.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdec %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdec (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdec (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesdeclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesdeclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesdeclast (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenc %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenc (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenc (%rax), %ymm1, %ymm3 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vaesenclast %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - - 0.33 - vaesenclast (%rax), %ymm1, %ymm3 +# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vaesenclast (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s index 1f4745a2c7b8f1..8375b08f95a6b3 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-vpclmulqdq.s @@ -33,9 +33,9 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: - - - - - 1.00 - - - - - - - vpclmulqdq $11, %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - vpclmulqdq $11, (%rax), %ymm1, %ymm3 +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - vpclmulqdq $11, (%rax), %ymm1, %ymm3 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s index 195908c756b843..834dfb8525bdad 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_32.s @@ -64,7 +64,7 @@ salc # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.60 0.33 - +# CHECK-NEXT: 3.60 3.60 0.33 0.33 - 3.60 3.60 - - - 0.33 0.60 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -79,5 +79,5 @@ salc # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - daa # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - das # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - into -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - leave # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - salc diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s index 1720b2d60ba903..d41abf60de1835 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x86_64.s @@ -1965,419 +1965,419 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 191.33 213.00 - +# CHECK-NEXT: 949.92 794.58 213.00 213.00 202.50 599.75 793.42 203.00 203.00 202.50 213.00 191.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - adcq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - adcq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock adcq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - adcq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - adcq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock adcq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - adcq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $665536, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - adcq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock adcq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - adcq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - addq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - addq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock addq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - addq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - andq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock andq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - andq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - addq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock addq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - addq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - andq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock andq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - andq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfw (%rax), %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfw (%rax), %di +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrw (%rax), %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrl %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfl (%rax), %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfl (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrl (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsfq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bsrq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsfq (%rax), %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - bsrq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsfq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - bsrq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - bswapl %eax # CHECK-NEXT: 0.50 1.00 - - - - 0.50 - - - - - - bswapq %rax # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw %si, %di -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrw %si, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsw %si, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrw %si, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsw %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrw $7, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsw $7, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrw $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrw $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsw $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl %esi, %edi -# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 1.47 0.33 - btl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - btsl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btcl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btrl %esi, (%rax) -# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 1.27 0.33 - lock btsl %esi, (%rax) +# CHECK-NEXT: 1.80 2.47 0.33 0.33 - 1.47 1.80 - - - 0.33 1.47 - btl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - btsl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btcl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btrl %esi, (%rax) +# CHECK-NEXT: 1.60 2.27 0.33 0.33 0.50 1.27 1.60 0.50 0.50 0.50 0.33 1.27 - lock btsl %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrl $7, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsl $7, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrl $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrl $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsl $7, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq %rsi, %rdi -# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 1.00 0.33 - btq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - btsq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btcq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btrq %rsi, (%rax) -# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.80 0.33 - lock btsq %rsi, (%rax) +# CHECK-NEXT: 2.00 2.00 0.33 0.33 - 1.00 2.00 - - - 0.33 1.00 - btq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - btsq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btcq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btrq %rsi, (%rax) +# CHECK-NEXT: 1.80 1.80 0.33 0.33 0.50 0.80 1.80 0.50 0.50 0.50 0.33 0.80 - lock btsq %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - btq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btcq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btrq $7, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - btsq $7, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - btq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - btsq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btcq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btrq $7, (%rax) -# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 - 0.33 - lock btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - btq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - btsq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btcq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btrq $7, (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 0.50 - - 0.50 0.50 0.50 0.33 - - lock btsq $7, (%rax) # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cbtw # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cwtl # CHECK-NEXT: - 0.50 - - - 0.50 - - - - - - - cltq -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cwtd +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cwtd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cltd # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - cqto # CHECK-NEXT: - - - - - - - - - - - - - clc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - cld -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmc -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - cmpq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - cmpq (%rax), %rdi -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsb %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsw %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsl %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 1.00 0.67 - cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgb %cl, %bl -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgw %cx, %bx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - 0.60 - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - cmpxchgq %rcx, (%rbx) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - lock cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - cld +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmc +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - cmpq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - cmpq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsb %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsw %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsl %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.00 1.00 0.67 0.67 - 1.00 1.00 - - - 0.67 1.00 - cmpsq %es:(%rdi), (%rsi) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgb %cl, %bl +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgw %cx, %bx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgl %ecx, %ebx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 1.60 0.60 - - - 0.60 1.60 - - - - 0.60 - cmpxchgq %rcx, %rbx +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - lock cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 7.50 6.50 - - 0.50 5.00 5.00 0.50 0.50 0.50 - - - cpuid -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - decl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - decl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - decq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - decq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - decq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock decq (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - divb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - divl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - divl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - divl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - divl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - divq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - divq (%rax) -# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 - 4.67 - enter $7, $4095 +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - divq (%rax) +# CHECK-NEXT: 12.50 2.00 4.67 4.67 2.00 9.00 10.50 2.50 2.50 2.00 4.67 - - enter $7, $4095 # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb %dil # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivb (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivw %si -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivw (%rax) -# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - 0.20 - - idivl %edx -# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - idivl (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivw %si +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivw (%rax) +# CHECK-NEXT: 0.20 3.20 - - - 0.20 0.20 - - - - 0.20 - idivl %edx +# CHECK-NEXT: 0.20 3.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - idivl (%rax) # CHECK-NEXT: - 3.00 - - - - - - - - - - - idivq %rcx -# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - - 0.33 - idivq (%rax) +# CHECK-NEXT: - 3.00 0.33 0.33 - - - - - - 0.33 - - idivq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - imulw %di -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - imulw (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - imulw %di +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - imulw (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulw %si, %di -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulw (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $511, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $511, (%rax), %di -# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - 0.20 - - imulw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - imulw $7, (%rax), %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - imull %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - imull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulw (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $511, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $511, (%rax), %di +# CHECK-NEXT: 0.20 1.20 - - - 0.20 0.20 - - - - 0.20 - imulw $7, %si, %di +# CHECK-NEXT: 0.20 1.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - imulw $7, (%rax), %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - imull %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - imull (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $665536, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $665536, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $665536, (%rax), %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imull $7, %esi, %edi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imull $7, (%rax), %edi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imull $7, (%rax), %edi # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - imulq %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - imulq (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - imulq (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $665536, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $665536, (%rax), %rdi +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $665536, (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - imulq $7, %rsi, %rdi -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - imulq $7, (%rax), %rdi -# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 1.87 2.67 - inb $7, %al -# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 1.87 2.33 - inb %dx, %al -# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 1.67 2.33 - inw $7, %ax -# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 1.80 2.33 - inw %dx, %ax -# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 1.87 3.33 - inl $7, %eax -# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 2.47 3.67 - inl %dx, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incb (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incb (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incw %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incw (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incw (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - incl %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incl (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - imulq $7, (%rax), %rdi +# CHECK-NEXT: 20.70 20.87 2.67 2.67 - 21.87 13.70 - - - 2.67 1.87 - inb $7, %al +# CHECK-NEXT: 20.70 20.87 2.33 2.33 - 21.87 13.70 - - - 2.33 1.87 - inb %dx, %al +# CHECK-NEXT: 21.00 20.67 2.33 2.33 - 22.67 14.00 - - - 2.33 1.67 - inw $7, %ax +# CHECK-NEXT: 21.30 21.30 2.33 2.33 - 21.80 13.80 - - - 2.33 1.80 - inw %dx, %ax +# CHECK-NEXT: 22.20 22.87 3.33 3.33 - 21.87 15.20 - - - 3.33 1.87 - inl $7, %eax +# CHECK-NEXT: 22.80 23.47 3.67 3.67 - 23.47 15.80 - - - 3.67 2.47 - inl %dx, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incb (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incb (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incw %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incw (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incw (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - incl %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incl (%rax) # CHECK-NEXT: - - - - - - - - - - - - - incq %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - incq (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock incq (%rax) -# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 1.20 2.67 - insb %dx, %es:(%rdi) -# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 1.13 3.00 - insw %dx, %es:(%rdi) -# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 1.00 3.67 - insl %dx, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - incq (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock incq (%rax) +# CHECK-NEXT: 20.20 18.20 2.67 2.67 0.50 20.20 13.20 0.50 0.50 0.50 2.67 1.20 - insb %dx, %es:(%rdi) +# CHECK-NEXT: 20.97 18.47 3.00 3.00 0.50 20.80 13.63 0.50 0.50 0.50 3.00 1.13 - insw %dx, %es:(%rdi) +# CHECK-NEXT: 22.17 18.33 3.67 3.67 0.50 22.67 14.83 0.50 0.50 0.50 3.67 1.00 - insl %dx, %es:(%rdi) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - int $7 -# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 1.47 - - invlpg (%rax) +# CHECK-NEXT: 9.80 7.47 - - 2.50 8.47 4.80 2.50 2.50 2.50 - 1.47 - invlpg (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - invlpga # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - lahf -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - leave -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsb (%rsi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - lodsw (%rsi), %ax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsl (%rsi), %eax -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - lodsq (%rsi), %rax -# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - 0.40 - - loop 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loope 0 -# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - 0.80 - - loopne 0 -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsb (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsw (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsl (%rsi), %es:(%rdi) -# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.80 0.33 - movsq (%rsi), %es:(%rdi) -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbw %al, %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbw %al, %di +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - leave +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsb (%rsi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - lodsw (%rsi), %ax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsl (%rsi), %eax +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - lodsq (%rsi), %rax +# CHECK-NEXT: 2.40 1.40 - - - 0.40 2.40 - - - - 0.40 - loop 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loope 0 +# CHECK-NEXT: 3.80 1.80 - - - 0.80 3.80 - - - - 0.80 - loopne 0 +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsb (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsw (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsl (%rsi), %es:(%rdi) +# CHECK-NEXT: 0.80 0.80 0.33 0.33 0.50 0.80 0.80 0.50 0.50 0.50 0.33 0.80 - movsq (%rsi), %es:(%rdi) +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbw %al, %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbw %al, %di # CHECK-NEXT: - 0.33 0.33 0.33 - 0.33 - - - - 0.33 0.33 - movsbw (%rax), %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - movzbw (%rax), %di -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbl %al, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbl %al, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movsbq %al, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzbq %al, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movsbq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzbq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswl %ax, %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwl %ax, %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswl (%rax), %edi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwl (%rax), %edi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movswq %ax, %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - movzwq %ax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movswq (%rax), %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movzwq (%rax), %rdi -# CHECK-NEXT: - 0.33 - - - 0.33 - - - - 0.33 - - movslq %eax, %rdi -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - movslq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - movzbw (%rax), %di +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbl %al, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbl %al, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movsbq %al, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzbq %al, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswl %ax, %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwl %ax, %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movswq %ax, %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - movzwq %ax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: - 0.33 - - - 0.33 - - - - - 0.33 - movslq %eax, %rdi +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - mulb %dil -# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - - 0.33 - mulb (%rax) -# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - 0.40 - - mulw %si -# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.40 0.33 - mulw (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - mull %edx -# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.20 0.33 - mull (%rax) +# CHECK-NEXT: - 1.00 0.33 0.33 - - - - - - 0.33 - - mulb (%rax) +# CHECK-NEXT: 0.90 1.40 - - - 0.40 0.90 - - - - 0.40 - mulw %si +# CHECK-NEXT: 0.90 1.40 0.33 0.33 - 0.40 0.90 - - - 0.33 0.40 - mulw (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - mull %edx +# CHECK-NEXT: 0.70 1.20 0.33 0.33 - 0.20 0.70 - - - 0.33 0.20 - mull (%rax) # CHECK-NEXT: - 1.00 - - - 1.00 - - - - - - - mulq %rcx -# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - - 0.33 - mulq (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - negq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - negq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock negq (%r10) +# CHECK-NEXT: - 1.00 0.33 0.33 - 1.00 - - - - 0.33 - - mulq (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - negq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - negq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock negq (%r10) # CHECK-NEXT: - - - - - - - - - - - - - nop # CHECK-NEXT: - - - - - - - - - - - - - nopw %di # CHECK-NEXT: - - - - - - - - - - - - - nopw (%rcx) @@ -2385,303 +2385,303 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - - nopl (%r8) # CHECK-NEXT: - - - - - - - - - - - - - nopq %rdx # CHECK-NEXT: - - - - - - - - - - - - - nopq (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notb %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notb (%r8) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notb (%r8) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notw %si -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notw (%r9) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notw (%r9) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notl %edx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notl (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notl (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - notq %rcx -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - notq (%r10) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock notq (%r10) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - orq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock orq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - orq (%rax), %rdi -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.00 1.67 - outb %al, $7 -# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.00 1.67 - outb %al, %dx -# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 0.80 2.33 - outw %ax, $7 -# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 1.20 2.33 - outw %ax, %dx -# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 0.80 3.00 - outl %eax, $7 -# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 1.20 3.00 - outl %eax, %dx -# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 1.20 2.33 - outsb (%rsi), %dx -# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 1.00 2.67 - outsw (%rsi), %dx -# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 1.20 3.33 - outsl (%rsi), %dx +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notb %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notb (%r8) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notb (%r8) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notw %si +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notw (%r9) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notw (%r9) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notl %edx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notl (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notl (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - notq %rcx +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - notq (%r10) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock notq (%r10) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - orq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock orq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - orq (%rax), %rdi +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.50 13.50 0.50 0.50 0.50 1.67 1.00 - outb %al, $7 +# CHECK-NEXT: 19.00 16.00 1.67 1.67 0.50 16.00 14.00 0.50 0.50 0.50 1.67 1.00 - outb %al, %dx +# CHECK-NEXT: 21.30 15.80 2.33 2.33 0.50 17.30 14.80 0.50 0.50 0.50 2.33 0.80 - outw %ax, $7 +# CHECK-NEXT: 20.70 16.20 2.33 2.33 0.50 17.20 14.70 0.50 0.50 0.50 2.33 1.20 - outw %ax, %dx +# CHECK-NEXT: 22.30 15.80 3.00 3.00 0.50 19.30 15.80 0.50 0.50 0.50 3.00 0.80 - outl %eax, $7 +# CHECK-NEXT: 21.70 16.20 3.00 3.00 0.50 19.20 15.70 0.50 0.50 0.50 3.00 1.20 - outl %eax, %dx +# CHECK-NEXT: 20.70 17.20 2.33 2.33 0.50 18.20 13.70 0.50 0.50 0.50 2.33 1.20 - outsb (%rsi), %dx +# CHECK-NEXT: 21.00 17.50 2.67 2.67 0.50 19.00 14.50 0.50 0.50 0.50 2.67 1.00 - outsw (%rsi), %dx +# CHECK-NEXT: 22.20 17.20 3.33 3.33 0.50 21.20 15.20 0.50 0.50 0.50 3.33 1.20 - outsl (%rsi), %dx # CHECK-NEXT: 0.50 - - - - 1.00 0.50 - - - - - - pause -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclb $7, %dil -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrb $7, %dil -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclb $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrb $7, (%rax) -# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - 0.40 - - rclb %cl, %dil -# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - 0.60 - - rcrb %cl, %dil -# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.20 0.33 - rclb %cl, (%rax) -# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.40 0.33 - rcrb %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclw $7, %di -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrw $7, %di -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclw $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrw $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclw %cl, %di -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrw %cl, %di -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclw %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrw %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcll $7, %edi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrl $7, %edi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcll $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrl $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcll %cl, %edi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrl %cl, %edi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcll %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrl %cl, (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq (%rax) -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rclq $7, %rdi -# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - 0.20 - - rcrq $7, %rdi -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rclq $7, (%rax) -# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.20 0.33 - rcrq $7, (%rax) -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rclq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - 0.40 - - rcrq %cl, %rdi -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rclq %cl, (%rax) -# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.40 0.33 - rcrq %cl, (%rax) -# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - 0.33 - - rdmsr -# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - 1.80 - - rdpmc -# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - 1.00 - - rdtsc +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclb $7, %dil +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrb $7, %dil +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclb $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrb $7, (%rax) +# CHECK-NEXT: 2.90 2.40 - - - 0.40 2.90 - - - - 0.40 - rclb %cl, %dil +# CHECK-NEXT: 2.60 3.60 - - - 0.60 2.60 - - - - 0.60 - rcrb %cl, %dil +# CHECK-NEXT: 2.70 2.20 0.33 0.33 0.50 0.20 2.70 0.50 0.50 0.50 0.33 0.20 - rclb %cl, (%rax) +# CHECK-NEXT: 2.40 3.40 0.33 0.33 0.50 0.40 2.40 0.50 0.50 0.50 0.33 0.40 - rcrb %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclw $7, %di +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrw $7, %di +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclw $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrw $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclw %cl, %di +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrw %cl, %di +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclw %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrw %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcll $7, %edi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrl $7, %edi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcll $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrl $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcll %cl, %edi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrl %cl, %edi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcll %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrl %cl, (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq (%rax) +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rclq $7, %rdi +# CHECK-NEXT: 1.20 0.20 - - - 0.20 1.20 - - - - 0.20 - rcrq $7, %rdi +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rclq $7, (%rax) +# CHECK-NEXT: 1.20 0.20 0.33 0.33 0.50 0.20 1.20 0.50 0.50 0.50 0.33 0.20 - rcrq $7, (%rax) +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rclq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 - - - 0.40 1.90 - - - - 0.40 - rcrq %cl, %rdi +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rclq %cl, (%rax) +# CHECK-NEXT: 1.90 2.40 0.33 0.33 0.50 0.40 1.90 0.50 0.50 0.50 0.33 0.40 - rcrq %cl, (%rax) +# CHECK-NEXT: 16.33 13.33 - - - 10.67 13.33 - - - - 0.33 - rdmsr +# CHECK-NEXT: 4.80 3.80 - - - 2.80 4.80 - - - - 1.80 - rdpmc +# CHECK-NEXT: 4.00 4.00 - - - 2.00 4.00 - - - - 1.00 - rdtsc # CHECK-NEXT: 7.50 5.33 - - - 4.00 4.17 - - - - - - rdtscp # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb $7, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb $7, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorb %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw $7, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw $7, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorw %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll $7, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl $7, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - roll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - roll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - roll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorl %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq $7, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq $7, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq $7, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq $7, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rolq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - rorq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rolq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - rorq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rolq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - rorq %cl, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sahf # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlb $7, %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrb $7, %dil -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlb $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlb $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrb $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlb %cl, %dil # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrb %cl, %dil -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlb %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlb %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrb %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlw $7, %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrw $7, %di -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlw $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlw $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrw $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlw %cl, %di # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrw %cl, %di -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlw %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlw %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrw %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarl $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shll $7, %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrl $7, %edi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarl $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shll $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarl $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shll $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrl $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarl %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shll %cl, %edi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrl %cl, %edi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarl %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shll %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarl %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shll %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrl %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sarq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shlq $7, %rdi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - shrq $7, %rdi -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - sarq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shlq $7, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 - 0.33 - shrq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - sarq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shlq $7, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 0.50 - 0.50 0.50 0.50 0.50 0.33 - - shrq $7, (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - sarq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shlq %cl, %rdi # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - shrq %cl, %rdi -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - sarq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shlq %cl, (%rax) -# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 - 0.33 - shrq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - sarq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shlq %cl, (%rax) +# CHECK-NEXT: 1.00 - 0.33 0.33 0.50 - 1.00 0.50 0.50 0.50 0.33 - - shrq %cl, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $0, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %al # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb $7, %dil -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbb $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbb $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbb $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbb %sil, %dil -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbb %sil, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbb %sil, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbb (%rax), %dil +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbb %sil, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbb %sil, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbb (%rax), %dil # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $0, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %ax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $511, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $511, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $511, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $511, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw $7, %di -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbw $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbw $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbw $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbw %si, %di -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbw %si, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbw %si, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbw (%rax), %di +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbw %si, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbw %si, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbw (%rax), %di # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $0, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %eax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $665536, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl $7, %edi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbl $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbl $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbl $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbl %esi, %edi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbl %esi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbl %esi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbl (%rax), %edi +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbl %esi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbl %esi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbl (%rax), %edi # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $0, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $0, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $0, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $0, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rax # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $665536, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $665536, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $665536, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $665536, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq $7, %rdi -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - sbbq $7, (%rax) -# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - lock sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - sbbq $7, (%rax) +# CHECK-NEXT: 0.70 0.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - lock sbbq $7, (%rax) # CHECK-NEXT: 0.50 - - - - - 0.50 - - - - - - sbbq %rsi, %rdi -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - sbbq %rsi, (%rax) -# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.40 0.33 - lock sbbq %rsi, (%rax) -# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - - 0.33 - sbbq (%rax), %rdi -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasb %es:(%rdi), %al -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasw %es:(%rdi), %ax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasl %es:(%rdi), %eax -# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.60 0.33 - scasq %es:(%rdi), %rax +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - sbbq %rsi, (%rax) +# CHECK-NEXT: 0.90 0.40 0.33 0.33 0.50 0.40 0.90 0.50 0.50 0.50 0.33 0.40 - lock sbbq %rsi, (%rax) +# CHECK-NEXT: 0.50 - 0.33 0.33 - - 0.50 - - - 0.33 - - sbbq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasb %es:(%rdi), %al +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasw %es:(%rdi), %ax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasl %es:(%rdi), %eax +# CHECK-NEXT: 0.60 0.60 0.33 0.33 - 0.60 0.60 - - - 0.33 0.60 - scasq %es:(%rdi), %rax # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - seto %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - seto (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setno %al @@ -2714,171 +2714,171 @@ xorq (%rax), %rdi # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setg (%rax) # CHECK-NEXT: 1.00 - - - - - 1.00 - - - - - - setle %al # CHECK-NEXT: 1.00 - - - 0.50 - 1.00 0.50 0.50 0.50 - - - setle (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdw %cl, %si, %di -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldw %cl, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdw %cl, %si, %di +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldw %cl, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdw %cl, %si, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldw $7, %si, %di # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdw $7, %si, %di -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldw $7, %si, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdw $7, %si, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdl %cl, %esi, %edi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldl %cl, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdl %cl, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldw $7, %si, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdw $7, %si, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdl %cl, %esi, %edi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldl %cl, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdl %cl, %esi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldl $7, %esi, %edi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdl $7, %esi, %edi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldl $7, %esi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdl $7, %esi, (%rax) -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shldq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - 0.20 - - shrdq %cl, %rsi, %rdi -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shldq %cl, %rsi, (%rax) -# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.20 0.33 - shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldl $7, %esi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdl $7, %esi, (%rax) +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shldq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 - - - 0.20 0.70 - - - - 0.20 - shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shldq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.70 1.20 0.33 0.33 0.50 0.20 0.70 0.50 0.50 0.50 0.33 0.20 - shrdq %cl, %rsi, (%rax) # CHECK-NEXT: - 1.00 - - - - - - - - - - - shldq $7, %rsi, %rdi # CHECK-NEXT: - 1.00 - - - - - - - - - - - shrdq $7, %rsi, %rdi -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shldq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - shrdq $7, %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - stc -# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - 0.20 - - std -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosb %al, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosw %ax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosl %eax, %es:(%rdi) -# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 0.40 - - stosq %rax, %es:(%rdi) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $665536, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shldq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 1.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - shrdq $7, %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - stc +# CHECK-NEXT: 0.70 0.20 - - - 0.20 0.70 - - - - 0.20 - std +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosb %al, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosw %ax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosl %eax, %es:(%rdi) +# CHECK-NEXT: 0.40 0.40 - - 0.50 0.40 0.40 0.50 0.50 0.50 - 0.40 - stosq %rax, %es:(%rdi) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $665536, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - subq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - subq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock subq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - subq (%rax), %rdi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - testq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - testq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - subq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock subq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - subq (%rax), %rdi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - testq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - testq %rsi, (%rax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - ud2 # CHECK-NEXT: 52.00 31.50 - - 0.50 27.00 31.50 0.50 0.50 0.50 - - - wrmsr -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddb %bl, %cl -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddb %bl, (%rcx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddb %bl, (%rcx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddw %bx, %cx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddw %ax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddl %ebx, %ecx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddl %eax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xaddq %rbx, %rcx -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - xaddq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.40 0.33 - lock xaddq %rax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgb %bl, %cl -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgb %bl, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgb %bl, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %ax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgw %bx, %cx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgw %ax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgw %ax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %eax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgl %ebx, %ecx -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - xchgl %eax, (%rbx) -# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.80 0.33 - lock xchgl %eax, (%rbx) -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rax -# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - 0.60 - - xchgq %rbx, %rcx -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - xchgq %rax, (%rbx) -# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 1.00 0.33 - lock xchgq %rax, (%rbx) -# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.40 0.33 - xlatb -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %al -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb $7, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorb %sil, %dil -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorb %sil, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorb (%rax), %dil -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %ax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $511, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $511, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw $7, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorw %si, %di -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorw %si, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorw (%rax), %di -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %eax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $665536, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl $7, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorl %esi, %edi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorl %esi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorl (%rax), %edi -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rax -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $665536, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $665536, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq $7, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq $7, (%rax) -# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - 0.20 - - xorq %rsi, %rdi -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.20 0.33 - lock xorq %rsi, (%rax) -# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.20 0.33 - xorq (%rax), %rdi +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddb %bl, %cl +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddb %bl, (%rcx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddb %bl, (%rcx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddw %bx, %cx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddw %ax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddl %ebx, %ecx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddl %eax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xaddq %rbx, %rcx +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - xaddq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 0.50 0.40 0.40 0.50 0.50 0.50 0.33 0.40 - lock xaddq %rax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgb %bl, %cl +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgb %bl, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgb %bl, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %ax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgw %bx, %cx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgw %ax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgw %ax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %eax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgl %ebx, %ecx +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - xchgl %eax, (%rbx) +# CHECK-NEXT: 1.30 0.80 0.33 0.33 0.50 0.80 1.30 0.50 0.50 0.50 0.33 0.80 - lock xchgl %eax, (%rbx) +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rax +# CHECK-NEXT: 0.60 0.60 - - - 0.60 0.60 - - - - 0.60 - xchgq %rbx, %rcx +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - xchgq %rax, (%rbx) +# CHECK-NEXT: 1.50 1.00 0.33 0.33 0.50 1.00 1.50 0.50 0.50 0.50 0.33 1.00 - lock xchgq %rax, (%rbx) +# CHECK-NEXT: 0.40 0.40 0.33 0.33 - 0.40 0.40 - - - 0.33 0.40 - xlatb +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %al +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb $7, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorb %sil, %dil +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorb %sil, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorb (%rax), %dil +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %ax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $511, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $511, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw $7, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorw %si, %di +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorw %si, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorw (%rax), %di +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %eax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $665536, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl $7, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorl %esi, %edi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorl %esi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorl (%rax), %edi +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rax +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $665536, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $665536, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq $7, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq $7, (%rax) +# CHECK-NEXT: 0.20 0.20 - - - 0.20 0.20 - - - - 0.20 - xorq %rsi, %rdi +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 0.50 0.20 0.20 0.50 0.50 0.50 0.33 0.20 - lock xorq %rsi, (%rax) +# CHECK-NEXT: 0.20 0.20 0.33 0.33 - 0.20 0.20 - - - 0.33 0.20 - xorq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s index e54f93ef8f5350..042740c91d8083 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-x87.s @@ -372,7 +372,7 @@ fyl2xp1 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 105.00 36.50 70.33 70.33 78.50 133.00 48.50 19.00 19.00 32.50 1.00 27.33 7.00 +# CHECK-NEXT: 105.00 36.50 70.33 70.33 78.50 133.00 48.50 19.00 19.00 32.50 27.33 1.00 7.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -380,12 +380,12 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - fabs # CHECK-NEXT: - - - - - 1.00 - - - - - - - fadd %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fadd %st(2), %st -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fadds (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - faddl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fadds (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - faddl (%ecx) # CHECK-NEXT: - - - - - 1.00 - - - - - - - faddp %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - faddp %st, %st(2) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fiadds (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fiaddl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fiadds (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fiaddl (%ecx) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fbld (%ecx) # CHECK-NEXT: - - - - 1.50 - - - - 0.50 - - - fbstp (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fchs @@ -400,12 +400,12 @@ fyl2xp1 # CHECK-NEXT: - 1.00 - - - - - - - - - - - fcmovu %st(1), %st # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcom %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcom %st(3) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcoms (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcoml (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcoms (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcoml (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcomp %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcomp %st(3) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcomps (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fcompl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcomps (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fcompl (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fcompp # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcomi %st(3), %st # CHECK-NEXT: - - - - - 1.00 - - - - - - - fcompi %st(3), %st @@ -413,28 +413,28 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fdecstp # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdiv %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdiv %st(2), %st -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivp %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivp %st, %st(2) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivr %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivr %st(2), %st -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivrs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fdivrl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivrs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fdivrl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivrp %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fdivrp %st, %st(2) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivrs (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fidivrl (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivrs (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fidivrl (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - ffree %st(0) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficoms (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficoml (%eax) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficomps (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - ficompl (%eax) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - filds (%edx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fildl (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fildll (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficoms (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficoml (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficomps (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - ficompl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - filds (%edx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fildl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fildll (%eax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - fincstp # CHECK-NEXT: 3.00 1.50 - - - 9.00 1.50 - - - - - - fninit # CHECK-NEXT: - - - - 1.50 1.00 - - - 0.50 - - - fists (%edx) @@ -446,11 +446,11 @@ fyl2xp1 # CHECK-NEXT: - - - - 1.50 1.00 - - - 0.50 - - - fisttpl (%ecx) # CHECK-NEXT: - - - - 1.50 1.00 - - - 0.50 - - - fisttpll (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fld %st(0) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - flds (%edx) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - fldl (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - - - - - - - 0.33 - fldt (%eax) -# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - - 0.33 - fldcw (%eax) -# CHECK-NEXT: 9.50 - 22.17 22.17 - 2.50 5.00 - - - - 2.67 - fldenv (%eax) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - flds (%edx) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - fldl (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - - - - - - 0.33 - - fldt (%eax) +# CHECK-NEXT: 1.50 - 0.33 0.33 - 0.50 - - - - 0.33 - - fldcw (%eax) +# CHECK-NEXT: 9.50 - 22.17 22.17 - 2.50 5.00 - - - 2.67 - - fldenv (%eax) # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fld1 # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fldl2e # CHECK-NEXT: 1.00 - - - - 1.00 - - - - - - - fldl2t @@ -460,12 +460,12 @@ fyl2xp1 # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - fldz # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmul %st(2), %st -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fmuls (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - - 0.33 - fmull (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fmuls (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - - - - - - 0.33 - - fmull (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmulp %st, %st(1) # CHECK-NEXT: 1.00 - - - - - - - - - - - - fmulp %st, %st(2) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fimuls (%ecx) -# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - - 0.33 - fimull (%eax) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fimuls (%ecx) +# CHECK-NEXT: 1.00 - 0.33 0.33 - 1.00 - - - - 0.33 - - fimull (%eax) # CHECK-NEXT: 0.50 - - - - 0.50 - - - - - - - fnop # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fpatan # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fprem @@ -493,20 +493,20 @@ fyl2xp1 # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fnsave (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsub %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsub %st(2), %st -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubl (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubp %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubp %st, %st(2) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubl (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubr %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubr %st(2), %st -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubrs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - - 0.33 - fsubrl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubrs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 1.00 - - - - 0.33 - - fsubrl (%eax) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubrp %st, %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fsubrp %st, %st(2) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubrs (%ecx) -# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - - 0.33 - fisubrl (%eax) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubrs (%ecx) +# CHECK-NEXT: - - 0.33 0.33 - 2.00 - - - - 0.33 - - fisubrl (%eax) # CHECK-NEXT: 1.00 - - - - - - - - - - - - ftst # CHECK-NEXT: - - - - - 1.00 - - - - - - - fucom %st(1) # CHECK-NEXT: - - - - - 1.00 - - - - - - - fucom %st(3) @@ -519,8 +519,8 @@ fyl2xp1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - fxam # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(1) # CHECK-NEXT: 4.00 2.00 - - - 4.00 5.00 - - - - - - fxch %st(3) -# CHECK-NEXT: 5.50 0.50 34.50 34.50 - 1.00 3.00 - - - - 11.00 - fxrstor (%eax) -# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 1.00 0.67 - fxsave (%eax) +# CHECK-NEXT: 5.50 0.50 34.50 34.50 - 1.00 3.00 - - - 11.00 - - fxrstor (%eax) +# CHECK-NEXT: 8.00 11.00 0.67 0.67 19.00 6.00 6.00 19.00 19.00 19.00 0.67 1.00 - fxsave (%eax) # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fxtract # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2x # CHECK-NEXT: 0.25 0.25 - - - 0.25 0.25 - - - - - - fyl2xp1 diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s index 824e8d37286847..7ba16d35730e06 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-xsave.s @@ -43,12 +43,12 @@ xsetbv # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 61.50 61.17 23.00 23.00 0.50 43.33 56.67 0.50 0.50 0.50 6.33 2.00 - +# CHECK-NEXT: 61.50 61.17 23.00 23.00 0.50 43.33 56.67 0.50 0.50 0.50 2.00 6.33 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - 1.40 - - xgetbv -# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - - 0.33 - xrstor (%rax) -# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - - 0.33 - xrstors (%rax) -# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 - 1.33 - xsave (%rax) -# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - 4.93 - - xsetbv +# CHECK-NEXT: 6.40 6.40 - - - 2.40 6.40 - - - - 1.40 - xgetbv +# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - 0.33 - - xrstor (%rax) +# CHECK-NEXT: - 1.00 10.83 10.83 - - 8.00 - - - 0.33 - - xrstors (%rax) +# CHECK-NEXT: 41.50 38.50 1.33 1.33 0.50 32.00 22.00 0.50 0.50 0.50 1.33 - - xsave (%rax) +# CHECK-NEXT: 13.60 14.27 - - - 8.93 12.27 - - - - 4.93 - xsetbv diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s index fd5c0d29d12d43..e11c548f5a7b6a 100644 --- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/zero-idioms.s @@ -345,14 +345,14 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 46.00 44.00 - - - 46.00 1.00 - - - 2.00 - - +# CHECK-NEXT: 46.00 44.00 - - - 46.00 1.00 - - - - 2.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: -# CHECK-NEXT: - - - - - - - - - - 1.00 - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - subl %eax, %eax # CHECK-NEXT: - - - - - - 1.00 - - - - - - subq %rax, %rax # CHECK-NEXT: - - - - - 1.00 - - - - - - - xorl %eax, %eax -# CHECK-NEXT: - - - - - - - - - - 1.00 - - xorq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - - 1.00 - xorq %rax, %rax # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtb %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtd %mm2, %mm2 # CHECK-NEXT: 1.00 - - - - - - - - - - - - pcmpgtw %mm2, %mm2 diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-versions.test b/llvm/test/tools/llvm-profdata/memprof-merge-versions.test index 722ef43484e7e9..0a658720162593 100644 --- a/llvm/test/tools/llvm-profdata/memprof-merge-versions.test +++ b/llvm/test/tools/llvm-profdata/memprof-merge-versions.test @@ -7,9 +7,6 @@ RUN: echo "1" >> %t.proftext RUN: echo "1" >> %t.proftext To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang -RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=1 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v1 -RUN: llvm-profdata show %t.prof.v1 | FileCheck %s - RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2 RUN: llvm-profdata show %t.prof.v2 | FileCheck %s diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-skip-bb-entries.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-skip-bb-entries.test new file mode 100644 index 00000000000000..44d718fa366cc9 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-skip-bb-entries.test @@ -0,0 +1,89 @@ +## This test checks how llvm-readobj prints skipped BB entries (-basic-block-address-map-skip-emit-bb-entries). + +## Check 64-bit: +# RUN: yaml2obj %s -DBITS=64 -o %t1.x64.o +# RUN: llvm-readobj %t1.x64.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DFILE=%t1.x64.o + +## Check 32-bit: +# RUN: yaml2obj %s -DBITS=32 -o %t1.x32.o +# RUN: llvm-readobj %t1.x32.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DFILE=%t1.x32.o + +# CHECK: BBAddrMap [ +# CHECK-NEXT: Function { +# CHECK-NEXT: At: 0x11111 +# CHECK-NEXT: Name: foo +# CHECK-NEXT: BB Ranges [ +# CHECK-NEXT: { +# CHECK-NEXT: Base Address: 0x11111 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: PGO analyses { +# CHECK-NEXT: FuncEntryCount: 100 +# CHECK-NEXT: PGO BB entries [ +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT:] +# CHECK-NEXT:BBAddrMap [ +# CHECK-NEXT: Function { +# CHECK-NEXT: At: 0x33333 +# CHECK-NEXT: Name: bar +# CHECK-NEXT: BB Ranges [ +# CHECK-NEXT: { +# CHECK-NEXT: Base Address: 0x33333 +# CHECK-NEXT: BB Entries [ +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: PGO analyses { +# CHECK-NEXT: FuncEntryCount: 89 +# CHECK-NEXT: PGO BB entries [ +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: } +# CHECK-NEXT:] + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [SHF_ALLOC] + - Name: .text.bar + Type: SHT_PROGBITS + Flags: [SHF_ALLOC] + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + ShSize: [[SIZE=]] + Link: .text + Entries: + - Version: 2 + Feature: 0x17 + BBRanges: + - BaseAddress: 0x11111 + PGOAnalyses: + - FuncEntryCount: 100 + - Name: '.llvm_bb_addr_map2' + Type: SHT_LLVM_BB_ADDR_MAP + Link: .text.bar + Entries: + - Version: 2 + Feature: 0x17 + BBRanges: + - BaseAddress: 0x33333 + PGOAnalyses: + - FuncEntryCount: 89 +Symbols: + - Name: foo + Section: .text + Type: STT_FUNC + Value: 0x11111 + - Name: bar + Section: .text.bar + Type: STT_FUNC + Value: 0x33333 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml index 4dfaf60be3c0ed..a4cb572e6d9932 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -66,7 +66,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported feature. # RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE -# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xff +# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xf0 --- !ELF FileHeader: @@ -79,5 +79,4 @@ Sections: Entries: - Version: 2 ## Specify unsupported feature - Feature: 0xFF - + Feature: 0xF0 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 7641a80129de35..2acf1cc34b2d8e 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -333,8 +333,7 @@ cl::opt MemProfVersionRequested( "memprof-version", cl::Hidden, cl::sub(MergeSubcommand), cl::desc("Specify the version of the memprof format to use"), cl::init(memprof::Version3), - cl::values(clEnumValN(memprof::Version1, "1", "version 1"), - clEnumValN(memprof::Version2, "2", "version 2"), + cl::values(clEnumValN(memprof::Version2, "2", "version 2"), clEnumValN(memprof::Version3, "3", "version 3"))); cl::opt MemProfFullSchema( diff --git a/llvm/unittests/IR/ModuleTest.cpp b/llvm/unittests/IR/ModuleTest.cpp index c18301d5e6d758..36c356730d27a7 100644 --- a/llvm/unittests/IR/ModuleTest.cpp +++ b/llvm/unittests/IR/ModuleTest.cpp @@ -14,6 +14,7 @@ #include "llvm/Pass.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" #include @@ -326,4 +327,80 @@ TEST(ModuleTest, GlobalList) { EXPECT_EQ(M->global_size(), 1u); } +TEST(ModuleTest, MoveAssign) { + // This tests that we can move-assign modules, we parse two modules and + // move assign the second one to the first one, and check that the print + // is equal to what we loaded. + LLVMContext C; + SMDiagnostic Err; + LLVMContext Context; + std::unique_ptr M1 = parseAssemblyString(R"( +; ModuleID = '' +source_filename = "" + +@GV1 = external global i32 + +@GA1 = alias void (), ptr @Foo1 + +define void @Foo1() { + ret void +} + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} +!foo1 = !{!3} +!bar1 = !{!4} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang1", isOptimized: true, flags: "-O2", runtimeVersion: 0, splitDebugFilename: "abc.debug", emissionKind: LineTablesOnly) +!2 = !DIFile(filename: "path/to/file1", directory: "/path/to/dir1") +!3 = !DILocation(line: 12, column: 34, scope: !4) +!4 = distinct !DISubprogram(name: "foo1", scope: null, spFlags: DISPFlagDefinition, unit: !1) +)", + Err, Context); + ASSERT_TRUE(M1.get()); + + StringLiteral M2Str = R"( +; ModuleID = '' +source_filename = "" + +@GV2 = external global i32 + +@GA2 = alias void (), ptr @Foo2 + +define void @Foo2() { + ret void +} + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} +!foo2 = !{!3} +!bar2 = !{!4} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang2", isOptimized: true, flags: "-O2", runtimeVersion: 0, splitDebugFilename: "abc.debug", emissionKind: LineTablesOnly) +!2 = !DIFile(filename: "path/to/file2", directory: "/path/to/dir2") +!3 = !DILocation(line: 1234, column: 56, scope: !4) +!4 = distinct !DISubprogram(name: "foo2", scope: null, spFlags: DISPFlagDefinition, unit: !1) +)"; + { + std::unique_ptr M2 = parseAssemblyString(M2Str, Err, Context); + ASSERT_TRUE(M2.get()); + auto *GV1 = M1->getNamedValue("GV1"); + ASSERT_TRUE(GV1); + auto *GV2 = M2->getNamedValue("GV2"); + ASSERT_TRUE(GV2); + ASSERT_EQ(GV2->getParent(), &*M2); + *M1 = std::move(*M2); + ASSERT_EQ(GV2->getParent(), &*M1); + } + + std::string M1Print; + { + llvm::raw_string_ostream Os(M1Print); + Os << "\n" << *M1; + } + ASSERT_EQ(M2Str, M1Print); +} + } // end namespace diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index c13dc0e3fab898..2a0921690914b4 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -1148,11 +1148,11 @@ TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { BBAddrMap E1 = { {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}}}}}}; - PGOAnalysisMap P1 = {892, {}, {true, false, false, false}}; + PGOAnalysisMap P1 = {892, {}, {true, false, false, false, false}}; BBAddrMap E2 = { {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}}}}}}; PGOAnalysisMap P2 = { - {}, {{BlockFrequency(343), {}}}, {false, true, false, false}}; + {}, {{BlockFrequency(343), {}}}, {false, true, false, false, false}}; BBAddrMap E3 = {{{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}}, {1, 0x3, 0x3, {false, false, true, false, false}}, @@ -1163,7 +1163,7 @@ TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { {2, BranchProbability::getRaw(0xeeee'eeee)}}}, {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}}, {{}, {}}}, - {false, false, true, false}}; + {false, false, true, false, false}}; BBAddrMap E4 = {{{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}}, {1, 0x4, 0x4, {false, false, false, false, false}}, @@ -1180,10 +1180,10 @@ TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { {3, BranchProbability::getRaw(0xeeee'eeee)}}}, {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, {BlockFrequency(1000), {}}}, - {true, true, true, false}}; + {true, true, true, false, false}}; BBAddrMap E5 = { {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}}}}}}; - PGOAnalysisMap P5 = {{}, {}, {false, false, false, false}}; + PGOAnalysisMap P5 = {{}, {}, {false, false, false, false, false}}; BBAddrMap E6 = { {{0x66666, {{0, 0x0, 0x6, {false, true, true, false, false}}, @@ -1195,7 +1195,7 @@ TEST(ELFObjectFileTest, ReadPGOAnalysisMap) { {2, BranchProbability::getRaw(0xcccc'cccc)}}}, {{}, {{2, BranchProbability::getRaw(0x8888'8888)}}}, {{}, {}}}, - {false, false, true, true}}; + {false, false, true, true, false}}; std::vector Section0BBAddrMaps = {E4, E5, E6}; std::vector Section1BBAddrMaps = {E3}; diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp index f04d45cf0983c7..13130dde80ef10 100644 --- a/llvm/unittests/Object/ELFTypesTest.cpp +++ b/llvm/unittests/Object/ELFTypesTest.cpp @@ -102,15 +102,15 @@ static_assert( TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { const std::array Decoded = { - {{false, false, false, false}, - {true, false, false, false}, - {false, true, false, false}, - {false, false, true, false}, - {false, false, false, true}, - {true, true, false, false}, - {false, true, true, false}, - {false, true, true, true}, - {true, true, true, true}}}; + {{false, false, false, false, false}, + {true, false, false, false, false}, + {false, true, false, false, false}, + {false, false, true, false, false}, + {false, false, false, true, false}, + {true, true, false, false, false}, + {false, true, true, false, false}, + {false, true, true, true, false}, + {true, true, true, true, false}}}; const std::array Encoded = { {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111}}; for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) @@ -125,9 +125,9 @@ TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) { const std::array Errors = { - "invalid encoding for BBAddrMap::Features: 0x10", - "invalid encoding for BBAddrMap::Features: 0xff"}; - const std::array Values = {{0b10000, 0b1111'1111}}; + "invalid encoding for BBAddrMap::Features: 0x20", + "invalid encoding for BBAddrMap::Features: 0xf0"}; + const std::array Values = {{0b10'0000, 0b1111'0000}}; for (const auto &[Val, Error] : llvm::zip(Values, Errors)) { EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(), FailedWithMessage(Error)); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 8bd39fd71266af..f366b228e63512 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -394,21 +394,6 @@ MemInfoBlock makePartialMIB() { return MIB; } -IndexedMemProfRecord makeRecord( - std::initializer_list> - AllocFrames, - std::initializer_list> - CallSiteFrames, - const MemInfoBlock &Block = makeFullMIB()) { - llvm::memprof::IndexedMemProfRecord MR; - for (const auto &Frames : AllocFrames) - MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames), - Block); - for (const auto &Frames : CallSiteFrames) - MR.CallSites.push_back(Frames); - return MR; -} - IndexedMemProfRecord makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames, std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames, @@ -456,48 +441,6 @@ MATCHER_P(EqualsRecord, Want, "") { return true; } -TEST_F(InstrProfTest, test_memprof_v0) { - ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf), - Succeeded()); - - const IndexedMemProfRecord IndexedMR = makeRecord( - /*AllocFrames=*/ - { - {0, 1}, - {2, 3}, - }, - /*CallSiteFrames=*/{ - {4, 5}, - }); - - memprof::IndexedMemProfData MemProfData; - MemProfData.Frames = getFrameMapping(); - MemProfData.Records.try_emplace(0x9999, IndexedMR); - Writer.addMemProfData(MemProfData, Err); - - auto Profile = Writer.writeBuffer(); - readProfile(std::move(Profile)); - - auto RecordOr = Reader->getMemProfRecord(0x9999); - ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded()); - const memprof::MemProfRecord &Record = RecordOr.get(); - - std::optional LastUnmappedFrameId; - auto IdToFrameCallback = [&](const memprof::FrameId Id) { - auto Iter = MemProfData.Frames.find(Id); - if (Iter == MemProfData.Frames.end()) { - LastUnmappedFrameId = Id; - return memprof::Frame(0, 0, 0, false); - } - return Iter->second; - }; - - const memprof::MemProfRecord WantRecord(IndexedMR, IdToFrameCallback); - ASSERT_EQ(LastUnmappedFrameId, std::nullopt) - << "could not map frame id: " << *LastUnmappedFrameId; - EXPECT_THAT(WantRecord, EqualsRecord(Record)); -} - TEST_F(InstrProfTest, test_memprof_v2_full_schema) { const MemInfoBlock MIB = makeFullMIB(); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 79b644dc5a528d..7b9910e295df9d 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -268,39 +268,6 @@ TEST(MemProf, PortableWrapper) { EXPECT_EQ(3UL, ReadBlock.getAllocCpuId()); } -TEST(MemProf, RecordSerializationRoundTripVersion1) { - const auto Schema = llvm::memprof::getFullSchema(); - - MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, - /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, - /*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0); - - llvm::SmallVector> AllocCallStacks = { - {0x123, 0x345}, {0x123, 0x567}}; - - llvm::SmallVector> CallSites = {{0x333, 0x777}}; - - IndexedMemProfRecord Record; - for (const auto &ACS : AllocCallStacks) { - // Use the same info block for both allocation sites. - Record.AllocSites.emplace_back(ACS, llvm::memprof::hashCallStack(ACS), - Info); - } - Record.CallSites.assign(CallSites); - for (const auto &CS : CallSites) - Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS)); - - std::string Buffer; - llvm::raw_string_ostream OS(Buffer); - Record.serialize(Schema, OS, llvm::memprof::Version1); - - const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( - Schema, reinterpret_cast(Buffer.data()), - llvm::memprof::Version1); - - EXPECT_EQ(Record, GotRecord); -} - TEST(MemProf, RecordSerializationRoundTripVerion2) { const auto Schema = llvm::memprof::getFullSchema(); @@ -661,7 +628,7 @@ TEST(MemProf, RadixTreeBuilderEmpty) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty()); const auto Mappings = Builder.takeCallStackPos(); @@ -679,7 +646,7 @@ TEST(MemProf, RadixTreeBuilderOne) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ 3U, // Size of CS1, @@ -706,7 +673,7 @@ TEST(MemProf, RadixTreeBuilderTwo) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ @@ -744,7 +711,7 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) { FrameHistogram = llvm::memprof::computeFrameHistogram(MemProfCallStackData); llvm::memprof::CallStackRadixTreeBuilder Builder; - Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, FrameHistogram); EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({ diff --git a/llvm/unittests/Target/AArch64/AArch64RegisterInfoTest.cpp b/llvm/unittests/Target/AArch64/AArch64RegisterInfoTest.cpp deleted file mode 100644 index 00e221eb434673..00000000000000 --- a/llvm/unittests/Target/AArch64/AArch64RegisterInfoTest.cpp +++ /dev/null @@ -1,152 +0,0 @@ -#include "AArch64RegisterInfo.h" -#include "AArch64InstrInfo.h" -#include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" - -#include "gtest/gtest.h" - -#define GET_COMPUTE_FEATURES -#include "AArch64GenInstrInfo.inc" - -using namespace llvm; - -namespace { - -std::unique_ptr createTargetMachine(const std::string &CPU) { - auto TT(Triple::normalize("aarch64--")); - - LLVMInitializeAArch64TargetInfo(); - LLVMInitializeAArch64Target(); - LLVMInitializeAArch64TargetMC(); - - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); - - return std::unique_ptr( - TheTarget->createTargetMachine(TT, CPU, "", TargetOptions(), std::nullopt, - std::nullopt, CodeGenOptLevel::Default)); -} - -std::unique_ptr createInstrInfo(TargetMachine *TM) { - AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), - std::string(TM->getTargetCPU()), - std::string(TM->getTargetFeatureString()), *TM, true); - return std::make_unique(ST); -} - -TEST(AArch64LaneBitmasks, SubRegs) { - std::unique_ptr TM = createTargetMachine(""); - ASSERT_TRUE(TM); - - std::unique_ptr II = createInstrInfo(TM.get()); - ASSERT_TRUE(II); - - const AArch64RegisterInfo &TRI = II->getRegisterInfo(); - - // Test that the lane masks for the subregisters 'bsub, hsub, ssub, etc' - // are composed correctly. - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::bsub) | - TRI.getSubRegIndexLaneMask(AArch64::bsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::hsub)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::hsub) | - TRI.getSubRegIndexLaneMask(AArch64::hsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::ssub)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::ssub) | - TRI.getSubRegIndexLaneMask(AArch64::ssub_hi), - TRI.getSubRegIndexLaneMask(AArch64::dsub)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::dsub) | - TRI.getSubRegIndexLaneMask(AArch64::dsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::zsub)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::zsub) | - TRI.getSubRegIndexLaneMask(AArch64::zsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::zsub0)); - - // Test that the lane masks for tuples are composed correctly. - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_bsub) | - TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_bsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_hsub)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_hsub) | - TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_hsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_ssub)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_ssub) | - TRI.getSubRegIndexLaneMask(AArch64::dsub1_then_ssub_hi), - TRI.getSubRegIndexLaneMask(AArch64::dsub1)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::dsub1) | - TRI.getSubRegIndexLaneMask(AArch64::qsub1_then_dsub_hi), - TRI.getSubRegIndexLaneMask(AArch64::qsub1)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::sub_32) | - TRI.getSubRegIndexLaneMask(AArch64::sub_32_hi), - TRI.getSubRegIndexLaneMask(AArch64::sube64)); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::subo64_then_sub_32) | - TRI.getSubRegIndexLaneMask(AArch64::subo64_then_sub_32_hi), - TRI.getSubRegIndexLaneMask(AArch64::subo64)); - - // Test that there is no overlap between different (sub)registers - // in a tuple. - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::dsub0) & - TRI.getSubRegIndexLaneMask(AArch64::dsub1) & - TRI.getSubRegIndexLaneMask(AArch64::dsub2) & - TRI.getSubRegIndexLaneMask(AArch64::dsub3), - LaneBitmask::getNone()); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::qsub0) & - TRI.getSubRegIndexLaneMask(AArch64::qsub1) & - TRI.getSubRegIndexLaneMask(AArch64::qsub2) & - TRI.getSubRegIndexLaneMask(AArch64::qsub3), - LaneBitmask::getNone()); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::zsub0) & - TRI.getSubRegIndexLaneMask(AArch64::zsub1) & - TRI.getSubRegIndexLaneMask(AArch64::zsub2) & - TRI.getSubRegIndexLaneMask(AArch64::zsub3), - LaneBitmask::getNone()); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::sube32) & - TRI.getSubRegIndexLaneMask(AArch64::subo32), - LaneBitmask::getNone()); - - EXPECT_EQ(TRI.getSubRegIndexLaneMask(AArch64::sube64) & - TRI.getSubRegIndexLaneMask(AArch64::subo64), - LaneBitmask::getNone()); - - // Test that getting a subregister results in the expected subregister. - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::bsub), AArch64::B0); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::hsub), AArch64::H0); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::ssub), AArch64::S0); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::dsub), AArch64::D0); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::zsub), AArch64::Q0); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::zsub0), AArch64::Z0); - - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::dsub1_then_bsub), - AArch64::B8); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::dsub1_then_hsub), - AArch64::H8); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::dsub1_then_ssub), - AArch64::S8); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::dsub1), AArch64::D8); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::qsub1), AArch64::Q8); - EXPECT_EQ(TRI.getSubReg(AArch64::Z0_Z8, AArch64::zsub1), AArch64::Z8); - - EXPECT_EQ(TRI.getSubReg(AArch64::X0_X1, AArch64::sube64), AArch64::X0); - EXPECT_EQ(TRI.getSubReg(AArch64::X0_X1, AArch64::subo64), AArch64::X1); - EXPECT_EQ(TRI.getSubReg(AArch64::X0_X1, AArch64::sub_32), AArch64::W0); - EXPECT_EQ(TRI.getSubReg(AArch64::X0_X1, AArch64::subo64_then_sub_32), - AArch64::W1); -} - -} // namespace diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt index 449888838acdc6..f53668373efee9 100644 --- a/llvm/unittests/Target/AArch64/CMakeLists.txt +++ b/llvm/unittests/Target/AArch64/CMakeLists.txt @@ -28,7 +28,6 @@ add_llvm_target_unittest(AArch64Tests InstSizes.cpp MatrixRegisterAliasing.cpp SMEAttributesTest.cpp - AArch64RegisterInfoTest.cpp AArch64SVESchedPseudoTest.cpp Immediates.cpp ) diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 3e0be7027eeff9..c2aa571b547c64 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -2491,7 +2491,6 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info, raw_ostream &OS) { if (!CI.isUserClass()) continue; - OS << " // '" << CI.ClassName << "' class\n"; OS << " case " << CI.Name << ": {\n"; OS << " DiagnosticPredicate DP(Operand." << CI.PredicateMethod << "());\n"; diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index a6f87119aca5ba..be2a2b3884c73b 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -953,7 +953,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) { OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[] = { // Descriptors\n"; - OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0, 0, 0 },\n"; + OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0, 0 },\n"; // Emit the register descriptors now. i = 0; @@ -968,8 +968,8 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS) { << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i]) << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", " << (Offset << RegUnitBits | FirstRU) << ", " - << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << ", " << Reg.Constant << ", " - << Reg.Artificial << " },\n"; + << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << ", " << Reg.Constant + << " },\n"; ++i; } OS << "};\n\n"; // End of register descriptors... diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index f05d8b89e73b93..7d4fb7d8e15045 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -222,6 +222,11 @@ class string: flags=(re.M | re.S), ) +ASM_FUNCTION_XTENSA_RE = re.compile( + r"^(?P[^:]+): +# @(?P=func)\n(?P.*?)\n\.Lfunc_end\d+:\n", + flags=(re.M | re.S), +) + ASM_FUNCTION_CSKY_RE = re.compile( r"^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n(?:\s*\.?Lfunc_begin[^:\n]*:\n)?[^:]*?" r"(?P^##?[ \t]+[^:]+:.*?)\s*" @@ -492,6 +497,17 @@ def scrub_asm_ve(asm, args): return asm +def scrub_asm_xtensa(asm, args): + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + asm = common.SCRUB_WHITESPACE_RE.sub(r" ", asm) + # Expand the tabs used for indentation. + asm = string.expandtabs(asm, 2) + # Strip trailing whitespace. + asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r"", asm) + return asm + + def scrub_asm_csky(asm, args): # Scrub runs of whitespace out of the assembly, but leave the leading # whitespace in place. @@ -576,6 +592,7 @@ def get_run_handler(triple): "wasm32": (scrub_asm_wasm, ASM_FUNCTION_WASM_RE), "wasm64": (scrub_asm_wasm, ASM_FUNCTION_WASM_RE), "ve": (scrub_asm_ve, ASM_FUNCTION_VE_RE), + "xtensa": (scrub_asm_xtensa, ASM_FUNCTION_XTENSA_RE), "csky": (scrub_asm_csky, ASM_FUNCTION_CSKY_RE), "nvptx": (scrub_asm_nvptx, ASM_FUNCTION_NVPTX_RE), "loongarch32": (scrub_asm_loongarch, ASM_FUNCTION_LOONGARCH_RE), diff --git a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn index 51dc24481a513b..945d31afca10f0 100644 --- a/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn @@ -15,6 +15,7 @@ unittest("CoreTests") { "BinaryContext.cpp", "DynoStats.cpp", "MCPlusBuilder.cpp", + "MemoryMaps.cpp", ] defines = [] diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn index 03c38757307d0c..c05500185fb613 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn @@ -68,6 +68,7 @@ static_library("LLVMHexagonCodeGen") { "HexagonISelLowering.cpp", "HexagonISelLoweringHVX.cpp", "HexagonInstrInfo.cpp", + "HexagonLoadStoreWidening.cpp", "HexagonLoopAlign.cpp", "HexagonLoopIdiomRecognition.cpp", "HexagonMCInstLower.cpp", @@ -83,7 +84,6 @@ static_library("LLVMHexagonCodeGen") { "HexagonSelectionDAGInfo.cpp", "HexagonSplitConst32AndConst64.cpp", "HexagonSplitDouble.cpp", - "HexagonStoreWidening.cpp", "HexagonSubtarget.cpp", "HexagonTargetMachine.cpp", "HexagonTargetObjectFile.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/AArch64/BUILD.gn index c37928632d0203..cddec7aee57454 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Target/AArch64/BUILD.gn @@ -17,7 +17,6 @@ unittest("AArch64Tests") { include_dirs = [ "//llvm/lib/Target/AArch64" ] sources = [ "AArch64InstPrinterTest.cpp", - "AArch64RegisterInfoTest.cpp", "AArch64SVESchedPseudoTest.cpp", "AddressingModes.cpp", "DecomposeStackOffsetTest.cpp", diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake index c07c55b1e17ad5..d3a98aaf6ffd17 100644 --- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake +++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake @@ -25,7 +25,7 @@ macro(mlir_configure_python_dev_packages) message(STATUS "Found python libraries: ${Python3_LIBRARIES}") message(STATUS "Found numpy v${Python3_NumPy_VERSION}: ${Python3_NumPy_INCLUDE_DIRS}") mlir_detect_pybind11_install() - find_package(pybind11 2.9 CONFIG REQUIRED) + find_package(pybind11 2.10 CONFIG REQUIRED) message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIR}") message(STATUS "Python prefix = '${PYTHON_MODULE_PREFIX}', " "suffix = '${PYTHON_MODULE_SUFFIX}', " diff --git a/mlir/docs/PatternRewriter.md b/mlir/docs/PatternRewriter.md index c61ceaf81681e2..2f1483db8190a7 100644 --- a/mlir/docs/PatternRewriter.md +++ b/mlir/docs/PatternRewriter.md @@ -73,7 +73,7 @@ public: // otherwise. // ... } - void rewrite(Operation *op, PatternRewriter &rewriter) { + void rewrite(Operation *op, PatternRewriter &rewriter) const override { // The `rewrite` method performs mutations on the IR rooted at `op` using // the provided rewriter. All mutations must go through the provided // rewriter. @@ -81,7 +81,7 @@ public: /// In this section, the `match` and `rewrite` implementation is specified /// using a single hook. - LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) { + LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { // The `matchAndRewrite` method performs both the matching and the mutation. // Note that the match must reach a successful point before IR mutation may // take place. diff --git a/mlir/include/mlir-c/Pass.h b/mlir/include/mlir-c/Pass.h index 2218ec0f47d199..6019071cfdaa29 100644 --- a/mlir/include/mlir-c/Pass.h +++ b/mlir/include/mlir-c/Pass.h @@ -75,10 +75,13 @@ MLIR_CAPI_EXPORTED MlirLogicalResult mlirPassManagerRunOnOp(MlirPassManager passManager, MlirOperation op); /// Enable IR printing. +/// The treePrintingPath argument is an optional path to a directory +/// where the dumps will be produced. If it isn't provided then dumps +/// are produced to stderr. MLIR_CAPI_EXPORTED void mlirPassManagerEnableIRPrinting( MlirPassManager passManager, bool printBeforeAll, bool printAfterAll, bool printModuleScope, bool printAfterOnlyOnChange, - bool printAfterOnlyOnFailure); + bool printAfterOnlyOnFailure, MlirStringRef treePrintingPath); /// Enable / disable verify-each. MLIR_CAPI_EXPORTED void diff --git a/mlir/include/mlir/Conversion/CMakeLists.txt b/mlir/include/mlir/Conversion/CMakeLists.txt index d212bf3e395e71..9f76ab659215ea 100644 --- a/mlir/include/mlir/Conversion/CMakeLists.txt +++ b/mlir/include/mlir/Conversion/CMakeLists.txt @@ -6,3 +6,5 @@ mlir_tablegen(Passes.capi.cpp.inc -gen-pass-capi-impl --prefix Conversion) add_public_tablegen_target(MLIRConversionPassIncGen) add_mlir_doc(Passes ConversionPasses ./ -gen-pass-doc) + +add_subdirectory(ConvertToLLVM) diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt b/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt new file mode 100644 index 00000000000000..54d7a03fc22dff --- /dev/null +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS ToLLVMInterface.td) +mlir_tablegen(ToLLVMAttrInterface.h.inc -gen-attr-interface-decls) +mlir_tablegen(ToLLVMAttrInterface.cpp.inc -gen-attr-interface-defs) +mlir_tablegen(ToLLVMOpInterface.h.inc -gen-op-interface-decls) +mlir_tablegen(ToLLVMOpInterface.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRConvertToLLVMInterfaceIncGen) +add_dependencies(mlir-generic-headers MLIRConvertToLLVMInterfaceIncGen) diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h index 00aeed9bf29dc2..6fd043646acd31 100644 --- a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h @@ -11,6 +11,7 @@ #include "mlir/IR/DialectInterface.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" namespace mlir { class ConversionTarget; @@ -18,6 +19,7 @@ class LLVMTypeConverter; class MLIRContext; class Operation; class RewritePatternSet; +class AnalysisManager; /// Base class for dialect interfaces providing translation to LLVM IR. /// Dialects that can be translated should provide an implementation of this @@ -50,6 +52,18 @@ void populateConversionTargetFromOperation(Operation *op, LLVMTypeConverter &typeConverter, RewritePatternSet &patterns); +/// Helper function for populating LLVM conversion patterns. If `op` implements +/// the `ConvertToLLVMOpInterface` interface, then the LLVM conversion pattern +/// attributes provided by the interface will be used to configure the +/// conversion target, type converter, and the pattern set. +void populateOpConvertToLLVMConversionPatterns(Operation *op, + ConversionTarget &target, + LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns); } // namespace mlir +#include "mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.h.inc" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.h.inc" + #endif // MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_H diff --git a/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td new file mode 100644 index 00000000000000..1331a9802c570f --- /dev/null +++ b/mlir/include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td @@ -0,0 +1,76 @@ + +//===- ToLLVMInterface.td - Conversion to LLVM interfaces -----*- tablegen -*-===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines interfaces for managing transformations, including populating +// pattern rewrites. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD +#define MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// Attribute interface +//===----------------------------------------------------------------------===// + +def ConvertToLLVMAttrInterface : + AttrInterface<"ConvertToLLVMAttrInterface"> { + let description = [{ + The `ConvertToLLVMAttrInterface` attribute interfaces allows using + attributes to configure the convert to LLVM infrastructure, this includes: + - The conversion target. + - The LLVM type converter. + - The pattern set. + + This interface permits fined grained configuration of the `convert-to-llvm` + process. For example, attributes with target information like + `#nvvm.target` or `#rodcl.target` can leverage this interface for populating + patterns specific to a particular target. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Populate the dialect conversion target, type converter and pattern set. + }], + /*retTy=*/"void", + /*methodName=*/"populateConvertToLLVMConversionPatterns", + /*args=*/(ins "::mlir::ConversionTarget&":$target, + "::mlir::LLVMTypeConverter&":$typeConverter, + "::mlir::RewritePatternSet&":$patternSet)> + ]; +} + +//===----------------------------------------------------------------------===// +// Op interface +//===----------------------------------------------------------------------===// + +def ConvertToLLVMOpInterface : OpInterface<"ConvertToLLVMOpInterface"> { + let description = [{ + Interface for collecting all convert to LLVM attributes stored in an + operation. See `ConvertToLLVMAttrInterface` for more information on these + attributes. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Populate the provided vector with a list of convert to LLVM attributes + to apply. + }], + /*retTy=*/"void", + /*methodName=*/"getConvertToLLVMConversionAttrs", + /*args=*/(ins + "::llvm::SmallVectorImpl<::mlir::ConvertToLLVMAttrInterface>&":$attrs) + > + ]; +} + +#endif // MLIR_CONVERSION_CONVERTTOLLVM_TOLLVMINTERFACE_TD diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h b/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h new file mode 100644 index 00000000000000..ad8c39fe676618 --- /dev/null +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUToLLVM.h @@ -0,0 +1,25 @@ +//===- GPUToLLVM.h - Convert GPU to LLVM dialect ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares registration functions for converting GPU to LLVM. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H +#define MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H + +namespace mlir { +class DialectRegistry; +namespace gpu { +/// Registers the `ConvertToLLVMOpInterface` interface on the `gpu::GPUModuleOP` +/// operation. +void registerConvertGpuToLLVMInterface(DialectRegistry ®istry); +} // namespace gpu +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUCOMMON_GPUTOLLVM_H diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h new file mode 100644 index 00000000000000..6311630a23c8f6 --- /dev/null +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVM.h @@ -0,0 +1,27 @@ +//===- GPUToNVVM.h - Convert GPU to NVVM dialect ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files declares registration functions for converting GPU to NVVM. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H +#define MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H + +namespace mlir { +class DialectRegistry; +namespace NVVM { +/// Registers the `ConvertToLLVMAttrInterface` interface on the +/// `NVVM::NVVMTargetAttr` attribute. This interface populates the conversion +/// target, LLVM type converter, and pattern set for converting GPU operations +/// to NVVM. +void registerConvertGpuToNVVMInterface(DialectRegistry ®istry); +} // namespace NVVM +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUTONVVM_GPUTONVVM_H diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h index 645e86a4309621..fc7c967f1b62cf 100644 --- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -31,6 +31,10 @@ LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type); /// Configure target to convert from the GPU dialect to NVVM. void configureGpuToNVVMConversionLegality(ConversionTarget &target); +/// Configure the LLVM type convert to convert types and address spaces from the +/// GPU dialect to NVVM. +void configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter); + /// Collect a set of patterns to convert from the GPU dialect to NVVM. void populateGpuToNVVMConversionPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 4d272ba219c6f1..e394bae64e0918 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -22,12 +22,20 @@ def ConvertToLLVMPass : Pass<"convert-to-llvm"> { This is a generic pass to convert to LLVM, it uses the `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects the injection of conversion patterns. + + If `dynamic` is set to `true`, the pass will look for + `ConvertToLLVMAttrInterface` attributes and use them to further configure + the conversion process. This option also uses the `DataLayoutAnalysis` + analysis to configure the type converter. Enabling this option incurs in + extra overhead. }]; let constructor = "mlir::createConvertToLLVMPass()"; let options = [ ListOption<"filterDialects", "filter-dialects", "std::string", "Test conversion patterns of only the specified dialects">, + Option<"useDynamic", "dynamic", "bool", "false", + "Use op conversion attributes to configure the conversion">, ]; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index 2ea589a7c4c3bd..8b380751c2f9d6 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -73,145 +73,6 @@ DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, "llvm.metadata"); #undef DEFINE_TRIVIAL_LLVM_TYPE -//===----------------------------------------------------------------------===// -// LLVMStructType. -//===----------------------------------------------------------------------===// - -/// LLVM dialect structure type representing a collection of different-typed -/// elements manipulated together. Structured can optionally be packed, meaning -/// that their elements immediately follow each other in memory without -/// accounting for potential alignment. -/// -/// Structure types can be identified (named) or literal. Literal structures -/// are uniquely represented by the list of types they contain and packedness. -/// Literal structure types are immutable after construction. -/// -/// Identified structures are uniquely represented by their name, a string. They -/// have a mutable component, consisting of the list of types they contain, -/// the packedness and the opacity bits. Identified structs can be created -/// without providing the lists of element types, making them suitable to -/// represent recursive, i.e. self-referring, structures. Identified structs -/// without body are considered opaque. For such structs, one can set the body. -/// Identified structs can be created as intentionally-opaque, implying that the -/// caller does not intend to ever set the body (e.g. forward-declarations of -/// structs from another module) and wants to disallow further modification of -/// the body. For intentionally-opaque structs or non-opaque structs with the -/// body, one is not allowed to set another body (however, one can set exactly -/// the same body). -/// -/// Note that the packedness of the struct takes place in uniquing of literal -/// structs, but does not in uniquing of identified structs. -class LLVMStructType - : public Type::TypeBase { -public: - /// Inherit base constructors. - using Base::Base; - - static constexpr StringLiteral name = "llvm.struct"; - - /// Checks if the given type can be contained in a structure type. - static bool isValidElementType(Type type); - - /// Gets or creates an identified struct with the given name in the provided - /// context. Note that unlike llvm::StructType::create, this function will - /// _NOT_ rename a struct in case a struct with the same name already exists - /// in the context. Instead, it will just return the existing struct, - /// similarly to the rest of MLIR type ::get methods. - static LLVMStructType getIdentified(MLIRContext *context, StringRef name); - static LLVMStructType - getIdentifiedChecked(function_ref emitError, - MLIRContext *context, StringRef name); - - /// Gets a new identified struct with the given body. The body _cannot_ be - /// changed later. If a struct with the given name already exists, renames - /// the struct by appending a `.` followed by a number to the name. Renaming - /// happens even if the existing struct has the same body. - static LLVMStructType getNewIdentified(MLIRContext *context, StringRef name, - ArrayRef elements, - bool isPacked = false); - - /// Gets or creates a literal struct with the given body in the provided - /// context. - static LLVMStructType getLiteral(MLIRContext *context, ArrayRef types, - bool isPacked = false); - static LLVMStructType - getLiteralChecked(function_ref emitError, - MLIRContext *context, ArrayRef types, - bool isPacked = false); - - /// Gets or creates an intentionally-opaque identified struct. Such a struct - /// cannot have its body set. To create an opaque struct with a mutable body, - /// use `getIdentified`. Note that unlike llvm::StructType::create, this - /// function will _NOT_ rename a struct in case a struct with the same name - /// already exists in the context. Instead, it will just return the existing - /// struct, similarly to the rest of MLIR type ::get methods. - static LLVMStructType getOpaque(StringRef name, MLIRContext *context); - static LLVMStructType - getOpaqueChecked(function_ref emitError, - MLIRContext *context, StringRef name); - - /// Set the body of an identified struct. Returns failure if the body could - /// not be set, e.g. if the struct already has a body or if it was marked as - /// intentionally opaque. This might happen in a multi-threaded context when a - /// different thread modified the struct after it was created. Most callers - /// are likely to assert this always succeeds, but it is possible to implement - /// a local renaming scheme based on the result of this call. - LogicalResult setBody(ArrayRef types, bool isPacked); - - /// Checks if a struct is packed. - bool isPacked() const; - - /// Checks if a struct is identified. - bool isIdentified() const; - - /// Checks if a struct is opaque. - bool isOpaque(); - - /// Checks if a struct is initialized. - bool isInitialized(); - - /// Returns the name of an identified struct. - StringRef getName(); - - /// Returns the list of element types contained in a non-opaque struct. - ArrayRef getBody() const; - - /// Verifies that the type about to be constructed is well-formed. - static LogicalResult - verifyInvariants(function_ref emitError, StringRef, - bool); - static LogicalResult - verifyInvariants(function_ref emitError, - ArrayRef types, bool); - using Base::verifyInvariants; - - /// Hooks for DataLayoutTypeInterface. Should not be called directly. Obtain a - /// DataLayout instance and query it instead. - llvm::TypeSize getTypeSizeInBits(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const; - - uint64_t getABIAlignment(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const; - - uint64_t getPreferredAlignment(const DataLayout &dataLayout, - DataLayoutEntryListRef params) const; - - bool areCompatible(DataLayoutEntryListRef oldLayout, - DataLayoutEntryListRef newLayout) const; - - LogicalResult verifyEntries(DataLayoutEntryListRef entries, - Location loc) const; - - /// Destructs the struct into its indexed field types. - std::optional> getSubelementIndexMap(); - - /// Returns which type is stored at a given integer index within the struct. - Type getTypeAtIndex(Attribute index); -}; - //===----------------------------------------------------------------------===// // Printing and parsing. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td index 09dd0919c318fb..e88139fa5b28da 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td @@ -117,6 +117,140 @@ def LLVMFunctionType : LLVMType<"LLVMFunction", "func"> { }]; } +//===----------------------------------------------------------------------===// +// LLVMStructType +//===----------------------------------------------------------------------===// + +def LLVMStructType : LLVMType<"LLVMStruct", "struct", [ + MutableType, + DeclareTypeInterfaceMethods, + DeclareTypeInterfaceMethods +]> { + let summary = "LLVM struct type"; + + let description = [{ + LLVM dialect structure type representing a collection of different-typed + elements manipulated together. Struct types can optionally be packed, meaning + that their elements immediately follow each other in memory without + accounting for potential alignment. + + Structure types can be identified (named) or literal. Literal structures + are uniquely represented by the list of types they contain and packedness. + Literal structure types are immutable after construction. + + Identified structures are uniquely represented by their name, a string. They + have a mutable component, consisting of the list of types they contain, + the packedness and the opacity bits. Identified structs can be created + without providing the lists of element types, making them suitable to + represent recursive, i.e. self-referring, structures. Identified structs + without body are considered opaque. For such structs, one can set the body. + Identified structs can be created as intentionally-opaque, implying that the + caller does not intend to ever set the body (e.g. forward-declarations of + structs from another module) and wants to disallow further modification of + the body. For intentionally-opaque structs or non-opaque structs with the + body, one is not allowed to set another body (however, one can set exactly + the same body). + + Note that the packedness of the struct takes place in uniquing of literal + structs, but does not in uniquing of identified structs. + }]; + + // Specify parameters for which TableGen can generate convenient getters for + // us. + // TODO: Other parameters such as 'packed' or 'opaque' could be added in the + // future iff they generate getters prefixed with 'is', instead of + // 'get'. Until then there are no advantages in doing so. + let parameters = (ins + StringRefParameter<"struct name", [{""}]>:$name, + OptionalArrayRefParameter<"mlir::Type">:$body + ); + + // A custom storage class defined in C++ is required to implement mutability. + let storageClass = "LLVMStructTypeStorage"; + let genStorageClass = 0; + + // We want users to use the more aptly named custom builders below. + let skipDefaultBuilders = 1; + + let extraClassDeclaration = [{ + /// Checks if the given type can be contained in a structure type. + static bool isValidElementType(Type type); + + /// Gets or creates an identified struct with the given name in the provided + /// context. Note that unlike llvm::StructType::create, this function will + /// _NOT_ rename a struct in case a struct with the same name already exists + /// in the context. Instead, it will just return the existing struct, + /// similarly to the rest of MLIR type ::get methods. + static LLVMStructType getIdentified(MLIRContext *context, StringRef name); + static LLVMStructType + getIdentifiedChecked(function_ref emitError, + MLIRContext *context, StringRef name); + + /// Gets a new identified struct with the given body. The body _cannot_ be + /// changed later. If a struct with the given name already exists, renames + /// the struct by appending a `.` followed by a number to the name. Renaming + /// happens even if the existing struct has the same body. + static LLVMStructType getNewIdentified(MLIRContext *context, StringRef name, + ArrayRef elements, + bool isPacked = false); + + /// Gets or creates a literal struct with the given body in the provided + /// context. + static LLVMStructType getLiteral(MLIRContext *context, ArrayRef types, + bool isPacked = false); + + static LLVMStructType + getLiteralChecked(function_ref emitError, + MLIRContext *context, ArrayRef types, + bool isPacked = false); + + /// Gets or creates an intentionally-opaque identified struct. Such a struct + /// cannot have its body set. + /// Note that unlike llvm::StructType::create, this function will _NOT_ + /// rename a struct in case a struct with the same name + /// already exists in the context. Instead, it will just return the existing + /// struct, similarly to the rest of MLIR type ::get methods. + static LLVMStructType getOpaque(StringRef name, MLIRContext *context); + + static LLVMStructType + getOpaqueChecked(function_ref emitError, + MLIRContext *context, StringRef name); + + /// Set the body of an identified struct. Returns failure if the body could + /// not be set, e.g. if the struct already has a body or if it was marked as + /// intentionally opaque. This might happen in a multi-threaded context when a + /// different thread modified the struct after it was created. Most callers + /// are likely to assert this always succeeds, but it is possible to implement + /// a local renaming scheme based on the result of this call. + LogicalResult setBody(ArrayRef types, bool isPacked); + + /// Checks if a struct is packed. + bool isPacked() const; + + /// Checks if a struct is identified. + bool isIdentified() const; + + /// Checks if a struct is opaque. + bool isOpaque(); + + /// Checks if a struct is initialized. + bool isInitialized(); + + /// Verifies that the type about to be constructed is well-formed. + static LogicalResult + verifyInvariants(function_ref emitError, StringRef, + bool); + static LogicalResult + verifyInvariants(function_ref emitError, + ArrayRef types, bool); + using Base::verifyInvariants; + }]; + + let hasCustomAssemblyFormat = 1; +} + //===----------------------------------------------------------------------===// // LLVMPointerType //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/AttrTypeBase.td b/mlir/include/mlir/IR/AttrTypeBase.td index cbe4f0d67574b3..38d38cf098df3e 100644 --- a/mlir/include/mlir/IR/AttrTypeBase.td +++ b/mlir/include/mlir/IR/AttrTypeBase.td @@ -56,6 +56,9 @@ class ParamNativeTypeTrait class GenInternalTypeTrait : GenInternalTrait; class PredTypeTrait : PredTrait; +// Trait required to be added to any type which is mutable. +def MutableType : NativeTypeTrait<"IsMutable">; + //===----------------------------------------------------------------------===// // Builders //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index 6fb71ccefda151..daea2a23d6fbed 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -19,6 +19,7 @@ class AffineExpr; class IRMapping; class UnknownLoc; class FileLineColLoc; +class FileLineColRange; class Type; class PrimitiveType; class IntegerType; diff --git a/mlir/include/mlir/IR/BuiltinDialectBytecode.td b/mlir/include/mlir/IR/BuiltinDialectBytecode.td index f50b5dd7ad8226..87da8fd3568fa2 100644 --- a/mlir/include/mlir/IR/BuiltinDialectBytecode.td +++ b/mlir/include/mlir/IR/BuiltinDialectBytecode.td @@ -95,11 +95,26 @@ def CallSiteLoc : DialectAttribute<(attr LocationAttr:$caller )>; +let cType = "FileLineColRange" in { +def FileLineColRange : DialectAttribute<(attr + StringAttr:$filename, + WithBuilder<"$_args", + WithType<"SmallVector", + WithParser<"succeeded(readFileLineColRangeLocs($_reader, $_var))", + WithPrinter<"writeFileLineColRangeLocs($_writer, $_name)">>>>:$rawLocData +)> { + let cBuilder = "getFileLineColRange(context, filename, rawLocData)"; + let printerPredicate = "!::llvm::isa($_val)"; +} + def FileLineColLoc : DialectAttribute<(attr StringAttr:$filename, - VarInt:$line, - VarInt:$column -)>; + VarInt:$start_line, + VarInt:$start_column +)> { + let printerPredicate = "::llvm::isa($_val)"; +} +} let cType = "FusedLoc", cBuilder = "cast(get(context, $_args))" in { @@ -321,7 +336,8 @@ def BuiltinDialectAttributes : DialectAttributes<"Builtin"> { DenseIntOrFPElementsAttr, DenseStringElementsAttr, SparseElementsAttr, - DistinctAttr + DistinctAttr, + FileLineColRange, ]; } diff --git a/mlir/include/mlir/IR/BuiltinLocationAttributes.td b/mlir/include/mlir/IR/BuiltinLocationAttributes.td index bbe566ce977775..fe4e61100872f2 100644 --- a/mlir/include/mlir/IR/BuiltinLocationAttributes.td +++ b/mlir/include/mlir/IR/BuiltinLocationAttributes.td @@ -60,46 +60,98 @@ def CallSiteLoc : Builtin_LocationAttr<"CallSiteLoc"> { } //===----------------------------------------------------------------------===// -// FileLineColLoc +// FileLineColRange //===----------------------------------------------------------------------===// -def FileLineColLoc : Builtin_LocationAttr<"FileLineColLoc"> { - let summary = "A file:line:column source location"; +def FileLineColRange : Builtin_LocationAttr<"FileLineColRange"> { + let summary = "A file:line:column source location range"; let description = [{ Syntax: ``` filelinecol-location ::= string-literal `:` integer-literal `:` integer-literal + (`to` (integer-literal ?) `:` integer-literal ?) ``` - An instance of this location represents a tuple of file, line number, and - column number. This is similar to the type of location that you get from - most source languages. + An instance of this location represents a tuple of file, start and end line + number, and start and end column number. It allows for the following + configurations: + + * A single file line location: `file:line`; + * A single file line col location: `file:line:column`; + * A single line range: `file:line:column to :column`; + * A single file range: `file:line:column to line:column`; Example: ```mlir - loc("mysource.cc":10:8) + loc("mysource.cc":10:8 to 12:18) ``` }]; - let parameters = (ins "StringAttr":$filename, "unsigned":$line, - "unsigned":$column); + + let parameters = (ins "StringAttr":$filename, + "unsigned":$start_line, "unsigned":$start_column, + "unsigned":$end_line, "unsigned":$end_column); let builders = [ + AttrBuilderWithInferredContext<(ins "StringAttr":$filename), [{ + return $_get(filename.getContext(), filename, ArrayRef{}); + }]>, + AttrBuilderWithInferredContext<(ins "StringAttr":$filename, + "unsigned":$line), [{ + return $_get(filename.getContext(), filename, + ArrayRef{line}); + }]>, AttrBuilderWithInferredContext<(ins "StringAttr":$filename, "unsigned":$line, "unsigned":$column), [{ - return $_get(filename.getContext(), filename, line, column); + return $_get(filename.getContext(), filename, + ArrayRef{line, column}); }]>, - AttrBuilder<(ins "StringRef":$filename, "unsigned":$line, - "unsigned":$column), [{ + AttrBuilder<(ins "::llvm::StringRef":$filename, + "unsigned":$start_line, + "unsigned":$start_column), [{ return $_get($_ctxt, - StringAttr::get($_ctxt, filename.empty() ? "-" : filename), - line, column); - }]> + StringAttr::get($_ctxt, filename.empty() ? "-" : filename), + ArrayRef{start_line, start_column}); + }]>, + AttrBuilderWithInferredContext<(ins "::mlir::StringAttr":$filename, + "unsigned":$line, + "unsigned":$start_column, + "unsigned":$end_column), [{ + return $_get(filename.getContext(), filename, + ArrayRef{line, start_column, end_column}); + }]>, + AttrBuilderWithInferredContext<(ins "::mlir::StringAttr":$filename, + "unsigned":$start_line, + "unsigned":$start_column, + "unsigned":$end_line, + "unsigned":$end_column), [{ + return $_get(filename.getContext(), filename, + ArrayRef{start_line, start_column, end_column, end_line}); + }]>, + AttrBuilder<(ins "::llvm::StringRef":$filename, + "unsigned":$start_line, + "unsigned":$start_column, + "unsigned":$end_line, + "unsigned":$end_column), [{ + return $_get($_ctxt, + StringAttr::get($_ctxt, filename.empty() ? "-" : filename), + ArrayRef{start_line, start_column, end_column, end_line}); + }]>, ]; + + let extraClassDeclaration = [{ + ::mlir::StringAttr getFilename() const; + unsigned getStartLine() const; + unsigned getStartColumn() const; + unsigned getEndColumn() const; + unsigned getEndLine() const; + }]; let skipDefaultBuilders = 1; - let attrName = "builtin.file_line_loc"; + let genAccessors = 0; + let genStorageClass = 0; + let attrName = "builtin.file_line_range"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Location.h b/mlir/include/mlir/IR/Location.h index 5eb1bfaf4afcdc..e206501f5ee6a2 100644 --- a/mlir/include/mlir/IR/Location.h +++ b/mlir/include/mlir/IR/Location.h @@ -136,6 +136,11 @@ inline ::llvm::hash_code hash_value(Location arg) { // Tablegen Attribute Declarations //===----------------------------------------------------------------------===// +// Forward declaration for class created later. +namespace mlir::detail { +struct FileLineColRangeAttrStorage; +} // namespace mlir::detail + #define GET_ATTRDEF_CLASSES #include "mlir/IR/BuiltinLocationAttributes.h.inc" @@ -164,6 +169,32 @@ class FusedLocWith : public FusedLoc { } }; +//===----------------------------------------------------------------------===// +// FileLineColLoc +//===----------------------------------------------------------------------===// + +/// An instance of this location represents a tuple of file, line number, and +/// column number. This is similar to the type of location that you get from +/// most source languages. +/// +/// FileLineColLoc is a FileLineColRange with exactly one line and column. +class FileLineColLoc : public FileLineColRange { +public: + using FileLineColRange::FileLineColRange; + + static FileLineColLoc get(StringAttr filename, unsigned line, + unsigned column); + static FileLineColLoc get(MLIRContext *context, StringRef fileName, + unsigned line, unsigned column); + + StringAttr getFilename() const; + unsigned getLine() const; + unsigned getColumn() const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast. + static bool classof(Attribute attr); +}; + //===----------------------------------------------------------------------===// // OpaqueLoc //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h index 1f2ef26b450701..14a6a2787b3a5d 100644 --- a/mlir/include/mlir/InitAllExtensions.h +++ b/mlir/include/mlir/InitAllExtensions.h @@ -18,6 +18,8 @@ #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" +#include "mlir/Conversion/GPUCommon/GPUToLLVM.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" #include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h" #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" @@ -72,6 +74,8 @@ inline void registerAllExtensions(DialectRegistry ®istry) { registerConvertOpenMPToLLVMInterface(registry); ub::registerConvertUBToLLVMInterface(registry); registerConvertAMXToLLVMInterface(registry); + gpu::registerConvertGpuToLLVMInterface(registry); + NVVM::registerConvertGpuToNVVMInterface(registry); // Register all transform dialect extensions. affine::registerTransformDialectExtension(registry); diff --git a/mlir/lib/AsmParser/LocationParser.cpp b/mlir/lib/AsmParser/LocationParser.cpp index 1365da03c7c3d6..fb0999bed201d5 100644 --- a/mlir/lib/AsmParser/LocationParser.cpp +++ b/mlir/lib/AsmParser/LocationParser.cpp @@ -12,6 +12,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Location.h" #include "mlir/Support/LLVM.h" +#include using namespace mlir; using namespace mlir::detail; @@ -97,37 +98,82 @@ ParseResult Parser::parseFusedLocation(LocationAttr &loc) { return success(); } -ParseResult Parser::parseNameOrFileLineColLocation(LocationAttr &loc) { +ParseResult Parser::parseNameOrFileLineColRange(LocationAttr &loc) { auto *ctx = getContext(); auto str = getToken().getStringValue(); consumeToken(Token::string); + std::optional startLine, startColumn, endLine, endColumn; + // If the next token is ':' this is a filelinecol location. if (consumeIf(Token::colon)) { // Parse the line number. if (getToken().isNot(Token::integer)) return emitWrongTokenError( - "expected integer line number in FileLineColLoc"); - auto line = getToken().getUnsignedIntegerValue(); - if (!line) + "expected integer line number in FileLineColRange"); + startLine = getToken().getUnsignedIntegerValue(); + if (!startLine) return emitWrongTokenError( - "expected integer line number in FileLineColLoc"); + "expected integer line number in FileLineColRange"); consumeToken(Token::integer); // Parse the ':'. - if (parseToken(Token::colon, "expected ':' in FileLineColLoc")) - return failure(); + if (getToken().isNot(Token::colon)) { + loc = FileLineColRange::get(StringAttr::get(ctx, str), *startLine); + return success(); + } + consumeToken(Token::colon); // Parse the column number. - if (getToken().isNot(Token::integer)) + if (getToken().isNot(Token::integer)) { + return emitWrongTokenError( + "expected integer column number in FileLineColRange"); + } + startColumn = getToken().getUnsignedIntegerValue(); + if (!startColumn.has_value()) + return emitError("expected integer column number in FileLineColRange"); + consumeToken(Token::integer); + + if (!isCurrentTokenAKeyword() || getTokenSpelling() != "to") { + loc = FileLineColLoc::get(ctx, str, *startLine, *startColumn); + return success(); + } + consumeToken(); + + // Parse the line number. + if (getToken().is(Token::integer)) { + endLine = getToken().getUnsignedIntegerValue(); + if (!endLine) { + return emitWrongTokenError( + "expected integer line number in FileLineColRange"); + } + consumeToken(Token::integer); + } + + // Parse the ':'. + if (getToken().isNot(Token::colon)) { + return emitWrongTokenError( + "expected either integer or `:` post `to` in FileLineColRange"); + } + consumeToken(Token::colon); + + // Parse the column number. + if (getToken().isNot(Token::integer)) { return emitWrongTokenError( - "expected integer column number in FileLineColLoc"); - auto column = getToken().getUnsignedIntegerValue(); - if (!column.has_value()) - return emitError("expected integer column number in FileLineColLoc"); + "expected integer column number in FileLineColRange"); + } + endColumn = getToken().getUnsignedIntegerValue(); + if (!endColumn.has_value()) + return emitError("expected integer column number in FileLineColRange"); consumeToken(Token::integer); - loc = FileLineColLoc::get(ctx, str, *line, *column); + if (endLine.has_value()) { + loc = FileLineColRange::get(StringAttr::get(ctx, str), *startLine, + *startColumn, *endLine, *endColumn); + } else { + loc = FileLineColRange::get(StringAttr::get(ctx, str), *startLine, + *startColumn, *endColumn); + } return success(); } @@ -166,7 +212,7 @@ ParseResult Parser::parseLocationInstance(LocationAttr &loc) { // Handle either name or filelinecol locations. if (getToken().is(Token::string)) - return parseNameOrFileLineColLocation(loc); + return parseNameOrFileLineColRange(loc); // Bare tokens required for other cases. if (!getToken().is(Token::bare_identifier)) diff --git a/mlir/lib/AsmParser/Parser.h b/mlir/lib/AsmParser/Parser.h index 4979cfc6e69e41..37670bd789fecb 100644 --- a/mlir/lib/AsmParser/Parser.h +++ b/mlir/lib/AsmParser/Parser.h @@ -310,7 +310,7 @@ class Parser { ParseResult parseFusedLocation(LocationAttr &loc); /// Parse a name or FileLineCol location instance. - ParseResult parseNameOrFileLineColLocation(LocationAttr &loc); + ParseResult parseNameOrFileLineColRange(LocationAttr &loc); //===--------------------------------------------------------------------===// // Affine Parsing diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index 417c66b9165e3b..cc9532f4e33b2c 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -1102,11 +1102,11 @@ class PyDenseElementsAttribute unpackedBooleans = unpackedBooleans[py::slice(0, numBooleans, 1)]; unpackedBooleans = equalFunc(unpackedBooleans, 1); - std::vector shape; MlirType shapedType = mlirAttributeGetType(*this); intptr_t rank = mlirShapedTypeGetRank(shapedType); + std::vector shape(rank); for (intptr_t i = 0; i < rank; ++i) { - shape.push_back(mlirShapedTypeGetDimSize(shapedType, i)); + shape[i] = mlirShapedTypeGetDimSize(shapedType, i); } unpackedBooleans = reshapeFunc(unpackedBooleans, shape); diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index 1d0e5ce2115a0a..e8d28abe6d583a 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -76,14 +76,21 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { "enable_ir_printing", [](PyPassManager &passManager, bool printBeforeAll, bool printAfterAll, bool printModuleScope, bool printAfterChange, - bool printAfterFailure) { + bool printAfterFailure, + std::optional optionalTreePrintingPath) { + std::string treePrintingPath = ""; + if (optionalTreePrintingPath.has_value()) + treePrintingPath = optionalTreePrintingPath.value(); mlirPassManagerEnableIRPrinting( passManager.get(), printBeforeAll, printAfterAll, - printModuleScope, printAfterChange, printAfterFailure); + printModuleScope, printAfterChange, printAfterFailure, + mlirStringRefCreate(treePrintingPath.data(), + treePrintingPath.size())); }, "print_before_all"_a = false, "print_after_all"_a = true, "print_module_scope"_a = false, "print_after_change"_a = false, "print_after_failure"_a = false, + "tree_printing_dir_path"_a = py::none(), "Enable IR printing, default as mlir-print-ir-after-all.") .def( "enable_verifier", diff --git a/mlir/lib/CAPI/IR/Pass.cpp b/mlir/lib/CAPI/IR/Pass.cpp index a6c9fbd08d45a6..01151eafeb5268 100644 --- a/mlir/lib/CAPI/IR/Pass.cpp +++ b/mlir/lib/CAPI/IR/Pass.cpp @@ -48,17 +48,25 @@ void mlirPassManagerEnableIRPrinting(MlirPassManager passManager, bool printBeforeAll, bool printAfterAll, bool printModuleScope, bool printAfterOnlyOnChange, - bool printAfterOnlyOnFailure) { + bool printAfterOnlyOnFailure, + MlirStringRef treePrintingPath) { auto shouldPrintBeforePass = [printBeforeAll](Pass *, Operation *) { return printBeforeAll; }; auto shouldPrintAfterPass = [printAfterAll](Pass *, Operation *) { return printAfterAll; }; - return unwrap(passManager) - ->enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass, - printModuleScope, printAfterOnlyOnChange, - printAfterOnlyOnFailure); + if (unwrap(treePrintingPath).empty()) + return unwrap(passManager) + ->enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass, + printModuleScope, printAfterOnlyOnChange, + printAfterOnlyOnFailure); + + unwrap(passManager) + ->enableIRPrintingToFileTree(shouldPrintBeforePass, shouldPrintAfterPass, + printModuleScope, printAfterOnlyOnChange, + printAfterOnlyOnFailure, + unwrap(treePrintingPath)); } void mlirPassManagerEnableVerifier(MlirPassManager passManager, bool enable) { diff --git a/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt index de3d850d520c0f..c71711ba2ebedb 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/ConvertToLLVM/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_conversion_library(MLIRConvertToLLVMInterface ToLLVMInterface.cpp DEPENDS + MLIRConvertToLLVMInterfaceIncGen LINK_LIBS PUBLIC MLIRIR @@ -21,6 +22,7 @@ add_mlir_conversion_library(MLIRConvertToLLVMPass LINK_LIBS PUBLIC MLIRIR + MLIRConvertToLLVMInterface MLIRLLVMCommonConversion MLIRLLVMDialect MLIRPass diff --git a/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp b/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp index b2407a258c2719..673ba814d338f4 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp +++ b/mlir/lib/Conversion/ConvertToLLVM/ConvertToLLVMPass.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" @@ -27,6 +28,41 @@ namespace mlir { using namespace mlir; namespace { +/// Base class for creating the internal implementation of `convert-to-llvm` +/// passes. +class ConvertToLLVMPassInterface { +public: + ConvertToLLVMPassInterface(MLIRContext *context, + ArrayRef filterDialects); + virtual ~ConvertToLLVMPassInterface() = default; + + /// Get the dependent dialects used by `convert-to-llvm`. + static void getDependentDialects(DialectRegistry ®istry); + + /// Initialize the internal state of the `convert-to-llvm` pass + /// implementation. This method is invoked by `ConvertToLLVMPass::initialize`. + /// This method returns whether the initialization process failed. + virtual LogicalResult initialize() = 0; + + /// Transform `op` to LLVM with the conversions available in the pass. The + /// analysis manager can be used to query analyzes like `DataLayoutAnalysis` + /// to further configure the conversion process. This method is invoked by + /// `ConvertToLLVMPass::runOnOperation`. This method returns whether the + /// transformation process failed. + virtual LogicalResult transform(Operation *op, + AnalysisManager manager) const = 0; + +protected: + /// Visit the `ConvertToLLVMPatternInterface` dialect interfaces and call + /// `visitor` with each of the interfaces. If `filterDialects` is non-empty, + /// then `visitor` is invoked only with the dialects in the `filterDialects` + /// list. + LogicalResult visitInterfaces( + llvm::function_ref visitor); + MLIRContext *context; + /// List of dialects names to use as filters. + ArrayRef filterDialects; +}; /// This DialectExtension can be attached to the context, which will invoke the /// `apply()` method for every loaded dialect. If a dialect implements the @@ -58,74 +94,188 @@ class LoadDependentDialectExtension : public DialectExtensionBase { } }; +//===----------------------------------------------------------------------===// +// StaticConvertToLLVM +//===----------------------------------------------------------------------===// + +/// Static implementation of the `convert-to-llvm` pass. This version only looks +/// at dialect interfaces to configure the conversion process. +struct StaticConvertToLLVM : public ConvertToLLVMPassInterface { + /// Pattern set with conversions to LLVM. + std::shared_ptr patterns; + /// The conversion target. + std::shared_ptr target; + /// The LLVM type converter. + std::shared_ptr typeConverter; + using ConvertToLLVMPassInterface::ConvertToLLVMPassInterface; + + /// Configure the conversion to LLVM at pass initialization. + LogicalResult initialize() final { + auto target = std::make_shared(*context); + auto typeConverter = std::make_shared(context); + RewritePatternSet tempPatterns(context); + target->addLegalDialect(); + // Populate the patterns with the dialect interface. + if (failed(visitInterfaces([&](ConvertToLLVMPatternInterface *iface) { + iface->populateConvertToLLVMConversionPatterns( + *target, *typeConverter, tempPatterns); + }))) + return failure(); + this->patterns = + std::make_unique(std::move(tempPatterns)); + this->target = target; + this->typeConverter = typeConverter; + return success(); + } + + /// Apply the conversion driver. + LogicalResult transform(Operation *op, AnalysisManager manager) const final { + if (failed(applyPartialConversion(op, *target, *patterns))) + return failure(); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// DynamicConvertToLLVM +//===----------------------------------------------------------------------===// + +/// Dynamic implementation of the `convert-to-llvm` pass. This version inspects +/// the IR to configure the conversion to LLVM. +struct DynamicConvertToLLVM : public ConvertToLLVMPassInterface { + /// A list of all the `ConvertToLLVMPatternInterface` dialect interfaces used + /// to partially configure the conversion process. + std::shared_ptr> + interfaces; + using ConvertToLLVMPassInterface::ConvertToLLVMPassInterface; + + /// Collect the dialect interfaces used to configure the conversion process. + LogicalResult initialize() final { + auto interfaces = + std::make_shared>(); + // Collect the interfaces. + if (failed(visitInterfaces([&](ConvertToLLVMPatternInterface *iface) { + interfaces->push_back(iface); + }))) + return failure(); + this->interfaces = interfaces; + return success(); + } + + /// Configure the conversion process and apply the conversion driver. + LogicalResult transform(Operation *op, AnalysisManager manager) const final { + RewritePatternSet patterns(context); + ConversionTarget target(*context); + target.addLegalDialect(); + // Get the data layout analysis. + const auto &dlAnalysis = manager.getAnalysis(); + LLVMTypeConverter typeConverter(context, &dlAnalysis); + + // Configure the conversion with dialect level interfaces. + for (ConvertToLLVMPatternInterface *iface : *interfaces) + iface->populateConvertToLLVMConversionPatterns(target, typeConverter, + patterns); + + // Configure the conversion attribute interfaces. + populateOpConvertToLLVMConversionPatterns(op, target, typeConverter, + patterns); + + // Apply the conversion. + if (failed(applyPartialConversion(op, target, std::move(patterns)))) + return failure(); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ConvertToLLVMPass +//===----------------------------------------------------------------------===// + /// This is a generic pass to convert to LLVM, it uses the /// `ConvertToLLVMPatternInterface` dialect interface to delegate to dialects /// the injection of conversion patterns. class ConvertToLLVMPass : public impl::ConvertToLLVMPassBase { - std::shared_ptr patterns; - std::shared_ptr target; - std::shared_ptr typeConverter; + std::shared_ptr impl; public: using impl::ConvertToLLVMPassBase::ConvertToLLVMPassBase; void getDependentDialects(DialectRegistry ®istry) const final { - registry.insert(); - registry.addExtensions(); + ConvertToLLVMPassInterface::getDependentDialects(registry); } LogicalResult initialize(MLIRContext *context) final { - RewritePatternSet tempPatterns(context); - auto target = std::make_shared(*context); - target->addLegalDialect(); - auto typeConverter = std::make_shared(context); - - if (!filterDialects.empty()) { - // Test mode: Populate only patterns from the specified dialects. Produce - // an error if the dialect is not loaded or does not implement the - // interface. - for (std::string &dialectName : filterDialects) { - Dialect *dialect = context->getLoadedDialect(dialectName); - if (!dialect) - return emitError(UnknownLoc::get(context)) - << "dialect not loaded: " << dialectName << "\n"; - auto *iface = dyn_cast(dialect); - if (!iface) - return emitError(UnknownLoc::get(context)) - << "dialect does not implement ConvertToLLVMPatternInterface: " - << dialectName << "\n"; - iface->populateConvertToLLVMConversionPatterns(*target, *typeConverter, - tempPatterns); - } - } else { - // Normal mode: Populate all patterns from all dialects that implement the - // interface. - for (Dialect *dialect : context->getLoadedDialects()) { - // First time we encounter this dialect: if it implements the interface, - // let's populate patterns ! - auto *iface = dyn_cast(dialect); - if (!iface) - continue; - iface->populateConvertToLLVMConversionPatterns(*target, *typeConverter, - tempPatterns); - } - } - - this->patterns = - std::make_unique(std::move(tempPatterns)); - this->target = target; - this->typeConverter = typeConverter; + std::shared_ptr impl; + // Choose the pass implementation. + if (useDynamic) + impl = std::make_shared(context, filterDialects); + else + impl = std::make_shared(context, filterDialects); + if (failed(impl->initialize())) + return failure(); + this->impl = impl; return success(); } void runOnOperation() final { - if (failed(applyPartialConversion(getOperation(), *target, *patterns))) - signalPassFailure(); + if (failed(impl->transform(getOperation(), getAnalysisManager()))) + return signalPassFailure(); } }; } // namespace +//===----------------------------------------------------------------------===// +// ConvertToLLVMPassInterface +//===----------------------------------------------------------------------===// + +ConvertToLLVMPassInterface::ConvertToLLVMPassInterface( + MLIRContext *context, ArrayRef filterDialects) + : context(context), filterDialects(filterDialects) {} + +void ConvertToLLVMPassInterface::getDependentDialects( + DialectRegistry ®istry) { + registry.insert(); + registry.addExtensions(); +} + +LogicalResult ConvertToLLVMPassInterface::visitInterfaces( + llvm::function_ref visitor) { + if (!filterDialects.empty()) { + // Test mode: Populate only patterns from the specified dialects. Produce + // an error if the dialect is not loaded or does not implement the + // interface. + for (StringRef dialectName : filterDialects) { + Dialect *dialect = context->getLoadedDialect(dialectName); + if (!dialect) + return emitError(UnknownLoc::get(context)) + << "dialect not loaded: " << dialectName << "\n"; + auto *iface = dyn_cast(dialect); + if (!iface) + return emitError(UnknownLoc::get(context)) + << "dialect does not implement ConvertToLLVMPatternInterface: " + << dialectName << "\n"; + visitor(iface); + } + } else { + // Normal mode: Populate all patterns from all dialects that implement the + // interface. + for (Dialect *dialect : context->getLoadedDialects()) { + // First time we encounter this dialect: if it implements the interface, + // let's populate patterns ! + auto *iface = dyn_cast(dialect); + if (!iface) + continue; + visitor(iface); + } + } + return success(); +} + +//===----------------------------------------------------------------------===// +// API +//===----------------------------------------------------------------------===// + void mlir::registerConvertToLLVMDependentDialectLoading( DialectRegistry ®istry) { registry.addExtensions(); diff --git a/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp b/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp index 3a4e83b2a8838f..252245dfbf5417 100644 --- a/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp +++ b/mlir/lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp @@ -30,3 +30,22 @@ void mlir::populateConversionTargetFromOperation( patterns); }); } + +void mlir::populateOpConvertToLLVMConversionPatterns( + Operation *op, ConversionTarget &target, LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) { + auto iface = dyn_cast(op); + if (!iface) + iface = op->getParentOfType(); + if (!iface) + return; + SmallVector attrs; + iface.getConvertToLLVMConversionAttrs(attrs); + for (ConvertToLLVMAttrInterface attr : attrs) + attr.populateConvertToLLVMConversionPatterns(target, typeConverter, + patterns); +} + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.cpp.inc" + +#include "mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.cpp.inc" diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp index 92b28ff9c58737..1497d662dcdbdd 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp @@ -22,6 +22,7 @@ #include "mlir/Conversion/ConvertToLLVM/ToLLVMPass.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" +#include "mlir/Conversion/GPUCommon/GPUToLLVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" @@ -1762,3 +1763,34 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter); patterns.add(converter, kernelBarePtrCallConv); } + +//===----------------------------------------------------------------------===// +// GPUModuleOp convert to LLVM op interface +//===----------------------------------------------------------------------===// + +namespace { +struct GPUModuleOpConvertToLLVMInterface + : public ConvertToLLVMOpInterface::ExternalModel< + GPUModuleOpConvertToLLVMInterface, gpu::GPUModuleOp> { + /// Get the conversion patterns from the target attribute. + void getConvertToLLVMConversionAttrs( + Operation *op, SmallVectorImpl &attrs) const; +}; +} // namespace + +void GPUModuleOpConvertToLLVMInterface::getConvertToLLVMConversionAttrs( + Operation *op, SmallVectorImpl &attrs) const { + auto module = cast(op); + ArrayAttr targetsAttr = module.getTargetsAttr(); + // Fail if there are no target attributes or there is more than one target. + if (!targetsAttr || targetsAttr.size() != 1) + return; + if (auto patternAttr = dyn_cast(targetsAttr[0])) + attrs.push_back(patternAttr); +} + +void mlir::gpu::registerConvertGpuToLLVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) { + gpu::GPUModuleOp::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 04e85c2b337dec..b343cf71e3a2e7 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -15,8 +15,10 @@ #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" +#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" @@ -274,29 +276,7 @@ struct LowerGpuOpsToNVVMOpsPass } LLVMTypeConverter converter(m.getContext(), options); - // NVVM uses alloca in the default address space to represent private - // memory allocations, so drop private annotations. NVVM uses address - // space 3 for shared memory. NVVM uses the default address space to - // represent global memory. - populateGpuMemorySpaceAttributeConversions( - converter, [](gpu::AddressSpace space) -> unsigned { - switch (space) { - case gpu::AddressSpace::Global: - return static_cast( - NVVM::NVVMMemorySpace::kGlobalMemorySpace); - case gpu::AddressSpace::Workgroup: - return static_cast( - NVVM::NVVMMemorySpace::kSharedMemorySpace); - case gpu::AddressSpace::Private: - return 0; - } - llvm_unreachable("unknown address space enum value"); - return 0; - }); - // Lowering for MMAMatrixType. - converter.addConversion([&](gpu::MMAMatrixType type) -> Type { - return convertMMAToLLVMType(type); - }); + configureGpuToNVVMTypeConverter(converter); RewritePatternSet llvmPatterns(m.getContext()); arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns); @@ -332,6 +312,32 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { target.addLegalOp(); } +void mlir::configureGpuToNVVMTypeConverter(LLVMTypeConverter &converter) { + // NVVM uses alloca in the default address space to represent private + // memory allocations, so drop private annotations. NVVM uses address + // space 3 for shared memory. NVVM uses the default address space to + // represent global memory. + populateGpuMemorySpaceAttributeConversions( + converter, [](gpu::AddressSpace space) -> unsigned { + switch (space) { + case gpu::AddressSpace::Global: + return static_cast( + NVVM::NVVMMemorySpace::kGlobalMemorySpace); + case gpu::AddressSpace::Workgroup: + return static_cast( + NVVM::NVVMMemorySpace::kSharedMemorySpace); + case gpu::AddressSpace::Private: + return 0; + } + llvm_unreachable("unknown address space enum value"); + return 0; + }); + // Lowering for MMAMatrixType. + converter.addConversion([&](gpu::MMAMatrixType type) -> Type { + return convertMMAToLLVMType(type); + }); +} + template static void populateOpPatterns(const LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, @@ -467,3 +473,34 @@ void mlir::populateGpuToNVVMConversionPatterns( populateOpPatterns(converter, patterns, "__nv_tanhf", "__nv_tanh"); } + +//===----------------------------------------------------------------------===// +// NVVMTargetAttr convert to LLVM attr interface +//===----------------------------------------------------------------------===// + +namespace { +struct NVVMTargetConvertToLLVMAttrInterface + : public ConvertToLLVMAttrInterface::ExternalModel< + NVVMTargetConvertToLLVMAttrInterface, NVVM::NVVMTargetAttr> { + /// Configure GPU to NVVM. + void populateConvertToLLVMConversionPatterns( + Attribute attr, ConversionTarget &target, + LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) const; +}; +} // namespace + +void NVVMTargetConvertToLLVMAttrInterface:: + populateConvertToLLVMConversionPatterns(Attribute attr, + ConversionTarget &target, + LLVMTypeConverter &typeConverter, + RewritePatternSet &patterns) const { + configureGpuToNVVMConversionLegality(target); + configureGpuToNVVMTypeConverter(typeConverter); + populateGpuToNVVMConversionPatterns(typeConverter, patterns); +} + +void mlir::NVVM::registerConvertGpuToNVVMInterface(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, NVVMDialect *dialect) { + NVVMTargetAttr::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp index ce91424e7a577e..59b0f5c9b09bcd 100644 --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -153,6 +153,12 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, type.isVarArg()); }); + // Helper function that checks if the given value range is a bare pointer. + auto isBarePointer = [](ValueRange values) { + return values.size() == 1 && + isa(values.front().getType()); + }; + // Argument materializations convert from the new block argument types // (multiple SSA values that make up a memref descriptor) back to the // original block argument type. The dialect conversion framework will then @@ -161,11 +167,10 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, UnrankedMemRefType resultType, ValueRange inputs, Location loc) { - if (inputs.size() == 1) { - // Bare pointers are not supported for unranked memrefs because a - // memref descriptor cannot be built just from a bare pointer. + // Note: Bare pointers are not supported for unranked memrefs because a + // memref descriptor cannot be built just from a bare pointer. + if (TypeRange(inputs) != getUnrankedMemRefDescriptorFields()) return Value(); - } Value desc = UnrankedMemRefDescriptor::pack(builder, loc, *this, resultType, inputs); // An argument materialization must return a value of type @@ -177,20 +182,17 @@ LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx, addArgumentMaterialization([&](OpBuilder &builder, MemRefType resultType, ValueRange inputs, Location loc) { Value desc; - if (inputs.size() == 1) { - // This is a bare pointer. We allow bare pointers only for function entry - // blocks. - BlockArgument barePtr = dyn_cast(inputs.front()); - if (!barePtr) - return Value(); - Block *block = barePtr.getOwner(); - if (!block->isEntryBlock() || - !isa(block->getParentOp())) - return Value(); + if (isBarePointer(inputs)) { desc = MemRefDescriptor::fromStaticShape(builder, loc, *this, resultType, inputs[0]); - } else { + } else if (TypeRange(inputs) == + getMemRefDescriptorFields(resultType, + /*unpackAggregates=*/true)) { desc = MemRefDescriptor::pack(builder, loc, *this, resultType, inputs); + } else { + // The inputs are neither a bare pointer nor an unpacked memref + // descriptor. This materialization function cannot be used. + return Value(); } // An argument materialization must return a value of type `resultType`, // so insert a cast from the memref descriptor type (!llvm.struct) to the diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp index fc31931da06073..e3f316443161f6 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp @@ -12,25 +12,9 @@ #include "mlir/Dialect/Affine/Passes.h" -#include "mlir/Analysis/SliceAnalysis.h" -#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" -#include "mlir/Dialect/Affine/Analysis/AffineStructures.h" -#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/Analysis/Utils.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Affine/LoopUtils.h" -#include "mlir/Dialect/Affine/Utils.h" -#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/Matchers.h" #include "mlir/Interfaces/SideEffectInterfaces.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -41,7 +25,7 @@ namespace affine { } // namespace affine } // namespace mlir -#define DEBUG_TYPE "licm" +#define DEBUG_TYPE "affine-licm" using namespace mlir; using namespace mlir::affine; @@ -49,9 +33,13 @@ using namespace mlir::affine; namespace { /// Affine loop invariant code motion (LICM) pass. -/// TODO: The pass is missing zero-trip tests. -/// TODO: This code should be removed once the new LICM pass can handle its -/// uses. +/// TODO: The pass is missing zero tripcount tests. +/// TODO: When compared to the other standard LICM pass, this pass +/// has some special handling for affine read/write ops but such handling +/// requires aliasing to be sound, and as such this pass is unsound. In +/// addition, this handling is nothing particular to affine memory ops but would +/// apply to any memory read/write effect ops. Either aliasing should be handled +/// or this pass can be removed and the standard LICM can be used. struct LoopInvariantCodeMotion : public affine::impl::AffineLoopInvariantCodeMotionBase< LoopInvariantCodeMotion> { @@ -61,100 +49,80 @@ struct LoopInvariantCodeMotion } // namespace static bool -checkInvarianceOfNestedIfOps(AffineIfOp ifOp, Value indVar, ValueRange iterArgs, +checkInvarianceOfNestedIfOps(AffineIfOp ifOp, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist); -static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs, +static bool isOpLoopInvariant(Operation &op, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist); static bool -areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, - ValueRange iterArgs, +areAllOpsInTheBlockListInvariant(Region &blockList, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist); -// Returns true if the individual op is loop invariant. -static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs, +/// Returns true if `op` is invariant on `loop`. +static bool isOpLoopInvariant(Operation &op, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist) { - LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;); + Value iv = loop.getInductionVar(); if (auto ifOp = dyn_cast(op)) { - if (!checkInvarianceOfNestedIfOps(ifOp, indVar, iterArgs, opsWithUsers, - opsToHoist)) + if (!checkInvarianceOfNestedIfOps(ifOp, loop, opsWithUsers, opsToHoist)) return false; } else if (auto forOp = dyn_cast(op)) { - if (!areAllOpsInTheBlockListInvariant(forOp.getRegion(), indVar, iterArgs, - opsWithUsers, opsToHoist)) + if (!areAllOpsInTheBlockListInvariant(forOp.getRegion(), loop, opsWithUsers, + opsToHoist)) return false; } else if (auto parOp = dyn_cast(op)) { - if (!areAllOpsInTheBlockListInvariant(parOp.getRegion(), indVar, iterArgs, - opsWithUsers, opsToHoist)) + if (!areAllOpsInTheBlockListInvariant(parOp.getRegion(), loop, opsWithUsers, + opsToHoist)) return false; } else if (!isMemoryEffectFree(&op) && - !isa(&op)) { + !isa(&op)) { // Check for side-effecting ops. Affine read/write ops are handled // separately below. return false; - } else if (!matchPattern(&op, m_Constant())) { + } else if (isa(op)) { // Register op in the set of ops that have users. opsWithUsers.insert(&op); - if (isa(op)) { - auto read = dyn_cast(op); - Value memref = read ? read.getMemRef() - : cast(op).getMemRef(); - for (auto *user : memref.getUsers()) { - // If this memref has a user that is a DMA, give up because these - // operations write to this memref. - if (isa(user)) + SmallVector userIVs; + auto read = dyn_cast(op); + Value memref = + read ? read.getMemRef() : cast(op).getMemRef(); + for (auto *user : memref.getUsers()) { + // If the memref used by the load/store is used in a store elsewhere in + // the loop nest, we do not hoist. Similarly, if the memref used in a + // load is also being stored too, we do not hoist the load. + // FIXME: This is missing checking aliases. + if (&op == user) + continue; + if (hasEffect(user, memref) || + (hasEffect(user, memref) && + isa(op))) { + userIVs.clear(); + getAffineForIVs(*user, &userIVs); + // Check that userIVs don't contain the for loop around the op. + if (llvm::is_contained(userIVs, loop)) return false; - // If the memref used by the load/store is used in a store elsewhere in - // the loop nest, we do not hoist. Similarly, if the memref used in a - // load is also being stored too, we do not hoist the load. - if (isa(user) || - (isa(user) && - isa(op))) { - if (&op != user) { - SmallVector userIVs; - getAffineForIVs(*user, &userIVs); - // Check that userIVs don't contain the for loop around the op. - if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) - return false; - } - } } } - - if (op.getNumOperands() == 0 && !isa(op)) { - LLVM_DEBUG(llvm::dbgs() << "Non-constant op with 0 operands\n"); - return false; - } } // Check operands. + ValueRange iterArgs = loop.getRegionIterArgs(); for (unsigned int i = 0; i < op.getNumOperands(); ++i) { auto *operandSrc = op.getOperand(i).getDefiningOp(); - LLVM_DEBUG( - op.getOperand(i).print(llvm::dbgs() << "Iterating on operand\n")); - // If the loop IV is the operand, this op isn't loop invariant. - if (indVar == op.getOperand(i)) { - LLVM_DEBUG(llvm::dbgs() << "Loop IV is the operand\n"); + if (iv == op.getOperand(i)) return false; - } // If the one of the iter_args is the operand, this op isn't loop invariant. - if (llvm::is_contained(iterArgs, op.getOperand(i))) { - LLVM_DEBUG(llvm::dbgs() << "One of the iter_args is the operand\n"); + if (llvm::is_contained(iterArgs, op.getOperand(i))) return false; - } if (operandSrc) { - LLVM_DEBUG(llvm::dbgs() << *operandSrc << "Iterating on operand src\n"); - // If the value was defined in the loop (outside of the if/else region), // and that operation itself wasn't meant to be hoisted, then mark this // operation loop dependent. @@ -170,14 +138,13 @@ static bool isOpLoopInvariant(Operation &op, Value indVar, ValueRange iterArgs, // Checks if all ops in a region (i.e. list of blocks) are loop invariant. static bool -areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, - ValueRange iterArgs, +areAllOpsInTheBlockListInvariant(Region &blockList, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist) { for (auto &b : blockList) { for (auto &op : b) { - if (!isOpLoopInvariant(op, indVar, iterArgs, opsWithUsers, opsToHoist)) + if (!isOpLoopInvariant(op, loop, opsWithUsers, opsToHoist)) return false; } } @@ -187,14 +154,14 @@ areAllOpsInTheBlockListInvariant(Region &blockList, Value indVar, // Returns true if the affine.if op can be hoisted. static bool -checkInvarianceOfNestedIfOps(AffineIfOp ifOp, Value indVar, ValueRange iterArgs, +checkInvarianceOfNestedIfOps(AffineIfOp ifOp, AffineForOp loop, SmallPtrSetImpl &opsWithUsers, SmallPtrSetImpl &opsToHoist) { - if (!areAllOpsInTheBlockListInvariant(ifOp.getThenRegion(), indVar, iterArgs, + if (!areAllOpsInTheBlockListInvariant(ifOp.getThenRegion(), loop, opsWithUsers, opsToHoist)) return false; - if (!areAllOpsInTheBlockListInvariant(ifOp.getElseRegion(), indVar, iterArgs, + if (!areAllOpsInTheBlockListInvariant(ifOp.getElseRegion(), loop, opsWithUsers, opsToHoist)) return false; @@ -202,10 +169,6 @@ checkInvarianceOfNestedIfOps(AffineIfOp ifOp, Value indVar, ValueRange iterArgs, } void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { - auto *loopBody = forOp.getBody(); - auto indVar = forOp.getInductionVar(); - ValueRange iterArgs = forOp.getRegionIterArgs(); - // This is the place where hoisted instructions would reside. OpBuilder b(forOp.getOperation()); @@ -213,14 +176,14 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { SmallVector opsToMove; SmallPtrSet opsWithUsers; - for (auto &op : *loopBody) { + for (Operation &op : *forOp.getBody()) { // Register op in the set of ops that have users. This set is used // to prevent hoisting ops that depend on these ops that are // not being hoisted. if (!op.use_empty()) opsWithUsers.insert(&op); if (!isa(op)) { - if (isOpLoopInvariant(op, indVar, iterArgs, opsWithUsers, opsToHoist)) { + if (isOpLoopInvariant(op, forOp, opsWithUsers, opsToHoist)) { opsToMove.push_back(&op); } } @@ -231,18 +194,13 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) { for (auto *op : opsToMove) { op->moveBefore(forOp); } - - LLVM_DEBUG(forOp->print(llvm::dbgs() << "Modified loop\n")); } void LoopInvariantCodeMotion::runOnOperation() { // Walk through all loops in a function in innermost-loop-first order. This // way, we first LICM from the inner loop, and place the ops in // the outer loop, which in turn can be further LICM'ed. - getOperation().walk([&](AffineForOp op) { - LLVM_DEBUG(op->print(llvm::dbgs() << "\nOriginal loop\n")); - runOnAffineForOp(op); - }); + getOperation().walk([&](AffineForOp op) { runOnAffineForOp(op); }); } std::unique_ptr> diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index e75d1c571d08cc..c5cc8bfeb0a642 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -129,8 +129,13 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) { auto *parentBlock = forOp->getBlock(); if (!iv.use_empty()) { if (forOp.hasConstantLowerBound()) { - OpBuilder topBuilder(forOp->getParentOfType().getBody()); - auto constOp = topBuilder.create( + auto func = forOp->getParentOfType(); + OpBuilder builder(forOp->getContext()); + if (func) + builder.setInsertionPointToStart(&func.getFunctionBody().front()); + else + builder.setInsertionPoint(forOp); + auto constOp = builder.create( forOp.getLoc(), forOp.getConstantLowerBound()); iv.replaceAllUsesWith(constOp); } else { @@ -1936,8 +1941,8 @@ static LogicalResult generateCopy( *nBegin = begin; *nEnd = end; - func::FuncOp f = begin->getParentOfType(); - OpBuilder topBuilder(f.getBody()); + auto f = begin->getParentOfType(); + OpBuilder topBuilder(f.getFunctionBody()); Value zeroIndex = topBuilder.create(f.getLoc(), 0); *sizeInBytes = 0; @@ -1956,8 +1961,9 @@ static LogicalResult generateCopy( OpBuilder &b = region.isWrite() ? epilogue : prologue; // Builder to create constants at the top level. - auto func = copyPlacementBlock->getParent()->getParentOfType(); - OpBuilder top(func.getBody()); + auto func = + copyPlacementBlock->getParent()->getParentOfType(); + OpBuilder top(func.getFunctionBody()); auto loc = region.loc; auto memref = region.memref; @@ -2298,21 +2304,26 @@ mlir::affine::affineDataCopyGenerate(Block::iterator begin, Block::iterator end, // Walk this range of operations to gather all memory regions. block->walk(begin, end, [&](Operation *opInst) { + Value memref; + MemRefType memrefType; // Gather regions to allocate to buffers in faster memory space. if (auto loadOp = dyn_cast(opInst)) { - if ((filterMemRef.has_value() && filterMemRef != loadOp.getMemRef()) || - (loadOp.getMemRefType().getMemorySpaceAsInt() != - copyOptions.slowMemorySpace)) - return; + memref = loadOp.getMemRef(); + memrefType = loadOp.getMemRefType(); } else if (auto storeOp = dyn_cast(opInst)) { - if ((filterMemRef.has_value() && filterMemRef != storeOp.getMemRef()) || - storeOp.getMemRefType().getMemorySpaceAsInt() != - copyOptions.slowMemorySpace) - return; - } else { - // Neither load nor a store op. - return; + memref = storeOp.getMemRef(); + memrefType = storeOp.getMemRefType(); } + // Neither load nor a store op. + if (!memref) + return; + + auto memorySpaceAttr = + dyn_cast_or_null(memrefType.getMemorySpace()); + if ((filterMemRef.has_value() && filterMemRef != memref) || + (memorySpaceAttr && + memrefType.getMemorySpaceAsInt() != copyOptions.slowMemorySpace)) + return; // Compute the MemRefRegion accessed. auto region = std::make_unique(opInst->getLoc()); diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 3420db771ef426..07d399adae0cd4 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -1391,11 +1391,11 @@ LogicalResult mlir::affine::replaceAllMemRefUsesWith( std::unique_ptr postDomInfo; if (domOpFilter) domInfo = std::make_unique( - domOpFilter->getParentOfType()); + domOpFilter->getParentOfType()); if (postDomOpFilter) postDomInfo = std::make_unique( - postDomOpFilter->getParentOfType()); + postDomOpFilter->getParentOfType()); // Walk all uses of old memref; collect ops to perform replacement. We use a // DenseSet since an operation could potentially have multiple uses of a diff --git a/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp b/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp index eb444d665ff260..b1cde6ca5d2fca 100644 --- a/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp +++ b/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp @@ -23,21 +23,34 @@ struct CallOpSignatureConversion : public OpConversionPattern { LogicalResult matchAndRewrite(CallOp callOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - // Convert the original function results. + // Convert the original function results. Keep track of how many result + // types an original result type is converted into. + SmallVector numResultsReplacments; SmallVector convertedResults; - if (failed(typeConverter->convertTypes(callOp.getResultTypes(), - convertedResults))) - return failure(); - - // If this isn't a one-to-one type mapping, we don't know how to aggregate - // the results. - if (callOp->getNumResults() != convertedResults.size()) - return failure(); + size_t numFlattenedResults = 0; + for (auto [idx, type] : llvm::enumerate(callOp.getResultTypes())) { + if (failed(typeConverter->convertTypes(type, convertedResults))) + return failure(); + numResultsReplacments.push_back(convertedResults.size() - + numFlattenedResults); + numFlattenedResults = convertedResults.size(); + } // Substitute with the new result types from the corresponding FuncType // conversion. - rewriter.replaceOpWithNewOp( - callOp, callOp.getCallee(), convertedResults, adaptor.getOperands()); + auto newCallOp = + rewriter.create(callOp.getLoc(), callOp.getCallee(), + convertedResults, adaptor.getOperands()); + SmallVector replacements; + size_t offset = 0; + for (int i = 0, e = callOp->getNumResults(); i < e; ++i) { + replacements.push_back( + newCallOp->getResults().slice(offset, numResultsReplacments[i])); + offset += numResultsReplacments[i]; + } + assert(offset == convertedResults.size() && + "expected that all converted results are used"); + rewriter.replaceOpWithMultiple(callOp, replacements); return success(); } }; diff --git a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp index 1adc381092bf3a..0ffd8131b89348 100644 --- a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp @@ -132,6 +132,29 @@ collectEffects(Operation *op, return false; } +/// Get all effects before the given operation caused by other operations in the +/// same block. That is, this will not consider operations beyond the block. +static bool +getEffectsBeforeInBlock(Operation *op, + SmallVectorImpl &effects, + bool stopAtBarrier) { + if (op == &op->getBlock()->front()) + return true; + + for (Operation *it = op->getPrevNode(); it != nullptr; + it = it->getPrevNode()) { + if (isa(it)) { + if (stopAtBarrier) + return true; + continue; + } + + if (!collectEffects(it, effects)) + return false; + } + return true; +} + /// Collects memory effects from operations that may be executed before `op` in /// a trivial structured control flow, e.g., without branches. Stops at the /// parallel region boundary or at the barrier operation if `stopAtBarrier` is @@ -153,19 +176,7 @@ getEffectsBefore(Operation *op, } // Collect all effects before the op. - if (op != &op->getBlock()->front()) { - for (Operation *it = op->getPrevNode(); it != nullptr; - it = it->getPrevNode()) { - if (isa(it)) { - if (stopAtBarrier) - return true; - else - continue; - } - if (!collectEffects(it, effects)) - return false; - } - } + getEffectsBeforeInBlock(op, effects, stopAtBarrier); // Stop if reached the parallel region boundary. if (isParallelRegionBoundary(op->getParentOp())) @@ -191,8 +202,8 @@ getEffectsBefore(Operation *op, // appropriately. if (isSequentialLoopLike(op->getParentOp())) { // Assuming loop terminators have no side effects. - return getEffectsBefore(op->getBlock()->getTerminator(), effects, - /*stopAtBarrier=*/true); + return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects, + /*stopAtBarrier=*/true); } // If the parent operation is not guaranteed to execute its (single-block) @@ -212,6 +223,28 @@ getEffectsBefore(Operation *op, return !conservative; } +/// Get all effects after the given operation caused by other operations in the +/// same block. That is, this will not consider operations beyond the block. +static bool +getEffectsAfterInBlock(Operation *op, + SmallVectorImpl &effects, + bool stopAtBarrier) { + if (op == &op->getBlock()->back()) + return true; + + for (Operation *it = op->getNextNode(); it != nullptr; + it = it->getNextNode()) { + if (isa(it)) { + if (stopAtBarrier) + return true; + continue; + } + if (!collectEffects(it, effects)) + return false; + } + return true; +} + /// Collects memory effects from operations that may be executed after `op` in /// a trivial structured control flow, e.g., without branches. Stops at the /// parallel region boundary or at the barrier operation if `stopAtBarrier` is @@ -233,17 +266,7 @@ getEffectsAfter(Operation *op, } // Collect all effects after the op. - if (op != &op->getBlock()->back()) - for (Operation *it = op->getNextNode(); it != nullptr; - it = it->getNextNode()) { - if (isa(it)) { - if (stopAtBarrier) - return true; - continue; - } - if (!collectEffects(it, effects)) - return false; - } + getEffectsAfterInBlock(op, effects, stopAtBarrier); // Stop if reached the parallel region boundary. if (isParallelRegionBoundary(op->getParentOp())) @@ -272,8 +295,8 @@ getEffectsAfter(Operation *op, return true; bool exact = collectEffects(&op->getBlock()->front(), effects); - return getEffectsAfter(&op->getBlock()->front(), effects, - /*stopAtBarrier=*/true) && + return getEffectsAfterInBlock(&op->getBlock()->front(), effects, + /*stopAtBarrier=*/true) && exact; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 9bb0c80749a5f5..d30a6b8398f064 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -3510,8 +3510,7 @@ void LLVMDialect::initialize() { LLVMPPCFP128Type, LLVMTokenType, LLVMLabelType, - LLVMMetadataType, - LLVMStructType>(); + LLVMMetadataType>(); // clang-format on registerTypes(); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 903035a3ec2296..655316cc5d66d6 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -1566,7 +1566,7 @@ DeletionKind LLVM::MemmoveOp::rewire(const DestructurableMemorySlot &slot, //===----------------------------------------------------------------------===// std::optional> -LLVM::LLVMStructType::getSubelementIndexMap() { +LLVM::LLVMStructType::getSubelementIndexMap() const { Type i32 = IntegerType::get(getContext(), 32); DenseMap destructured; for (const auto &[index, elemType] : llvm::enumerate(getBody())) @@ -1574,7 +1574,7 @@ LLVM::LLVMStructType::getSubelementIndexMap() { return destructured; } -Type LLVM::LLVMStructType::getTypeAtIndex(Attribute index) { +Type LLVM::LLVMStructType::getTypeAtIndex(Attribute index) const { auto indexAttr = llvm::dyn_cast(index); if (!indexAttr || !indexAttr.getType().isInteger(32)) return {}; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index 1bed3fa48b30d7..33c231e2d2045f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -485,7 +485,7 @@ bool LLVMStructType::isOpaque() { (getImpl()->isOpaque() || !getImpl()->isInitialized()); } bool LLVMStructType::isInitialized() { return getImpl()->isInitialized(); } -StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } +StringRef LLVMStructType::getName() const { return getImpl()->getIdentifier(); } ArrayRef LLVMStructType::getBody() const { return isIdentified() ? getImpl()->getIdentifiedStructBody() : getImpl()->getTypeList(); diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index a728425f2ec6ba..61b90bc9b0a7bb 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -2009,12 +2009,23 @@ void AsmPrinter::Impl::printLocationInternal(LocationAttr loc, bool pretty, else os << "unknown"; }) - .Case([&](FileLineColLoc loc) { + .Case([&](FileLineColRange loc) { if (pretty) os << loc.getFilename().getValue(); else printEscapedString(loc.getFilename()); - os << ':' << loc.getLine() << ':' << loc.getColumn(); + if (loc.getEndColumn() == loc.getStartColumn() && + loc.getStartLine() == loc.getEndLine()) { + os << ':' << loc.getStartLine() << ':' << loc.getStartColumn(); + return; + } + if (loc.getStartLine() == loc.getEndLine()) { + os << ':' << loc.getStartLine() << ':' << loc.getStartColumn() + << " to :" << loc.getEndColumn(); + return; + } + os << ':' << loc.getStartLine() << ':' << loc.getStartColumn() << " to " + << loc.getEndLine() << ':' << loc.getEndColumn(); }) .Case([&](NameLoc loc) { printEscapedString(loc.getName()); diff --git a/mlir/lib/IR/BuiltinDialectBytecode.cpp b/mlir/lib/IR/BuiltinDialectBytecode.cpp index 6131b7eae90c8b..6095c6bcb2ce39 100644 --- a/mlir/lib/IR/BuiltinDialectBytecode.cpp +++ b/mlir/lib/IR/BuiltinDialectBytecode.cpp @@ -14,7 +14,10 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/DialectResourceBlobManager.h" +#include "mlir/IR/Location.h" +#include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include using namespace mlir; @@ -70,8 +73,8 @@ readPotentiallySplatString(DialectBytecodeReader &reader, ShapedType type, return success(); } -void writePotentiallySplatString(DialectBytecodeWriter &writer, - DenseStringElementsAttr attr) { +static void writePotentiallySplatString(DialectBytecodeWriter &writer, + DenseStringElementsAttr attr) { bool isSplat = attr.isSplat(); if (isSplat) return writer.writeOwnedString(attr.getRawStringData().front()); @@ -80,6 +83,70 @@ void writePotentiallySplatString(DialectBytecodeWriter &writer, writer.writeOwnedString(str); } +static FileLineColRange getFileLineColRange(MLIRContext *context, + StringAttr filename, + ArrayRef lineCols) { + switch (lineCols.size()) { + case 0: + return FileLineColRange::get(filename); + case 1: + return FileLineColRange::get(filename, lineCols[0]); + case 2: + return FileLineColRange::get(filename, lineCols[0], lineCols[1]); + case 3: + return FileLineColRange::get(filename, lineCols[0], lineCols[1], + lineCols[2]); + case 4: + return FileLineColRange::get(filename, lineCols[0], lineCols[1], + lineCols[2], lineCols[3]); + default: + return {}; + } +} + +static LogicalResult +readFileLineColRangeLocs(DialectBytecodeReader &reader, + SmallVectorImpl &lineCols) { + return reader.readList( + lineCols, [&reader](uint64_t &val) { return reader.readVarInt(val); }); +} + +static void writeFileLineColRangeLocs(DialectBytecodeWriter &writer, + FileLineColRange range) { + if (range.getStartLine() == 0 && range.getStartColumn() == 0 && + range.getEndLine() == 0 && range.getEndColumn() == 0) { + writer.writeVarInt(0); + return; + } + if (range.getStartColumn() == 0 && + range.getStartLine() == range.getEndLine()) { + writer.writeVarInt(1); + writer.writeVarInt(range.getStartLine()); + return; + } + // The single file:line:col is handled by other writer, but checked here for + // completeness. + if (range.getEndColumn() == range.getStartColumn() && + range.getStartLine() == range.getEndLine()) { + writer.writeVarInt(2); + writer.writeVarInt(range.getStartLine()); + writer.writeVarInt(range.getStartColumn()); + return; + } + if (range.getStartLine() == range.getEndLine()) { + writer.writeVarInt(3); + writer.writeVarInt(range.getStartLine()); + writer.writeVarInt(range.getStartColumn()); + writer.writeVarInt(range.getEndColumn()); + return; + } + writer.writeVarInt(4); + writer.writeVarInt(range.getStartLine()); + writer.writeVarInt(range.getStartColumn()); + writer.writeVarInt(range.getEndLine()); + writer.writeVarInt(range.getEndColumn()); +} + #include "mlir/IR/BuiltinDialectBytecode.cpp.inc" /// This class implements the bytecode interface for the builtin dialect. diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp index dbd84912a8657d..ce78d30ee0a526 100644 --- a/mlir/lib/IR/Location.cpp +++ b/mlir/lib/IR/Location.cpp @@ -7,31 +7,118 @@ //===----------------------------------------------------------------------===// #include "mlir/IR/Location.h" +#include "mlir/IR/AttributeSupport.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/Visitors.h" +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/TrailingObjects.h" +#include +#include +#include +#include +#include +#include using namespace mlir; using namespace mlir::detail; -//===----------------------------------------------------------------------===// -/// Tablegen Attribute Definitions -//===----------------------------------------------------------------------===// +namespace mlir::detail { +struct FileLineColRangeAttrStorage final + : public ::mlir::AttributeStorage, + public llvm::TrailingObjects { + using PointerPair = llvm::PointerIntPair; + using KeyTy = std::tuple>; -#define GET_ATTRDEF_CLASSES -#include "mlir/IR/BuiltinLocationAttributes.cpp.inc" + FileLineColRangeAttrStorage(StringAttr filename, int numLocs) + : filenameAndTrailing(filename, numLocs) {} + + static FileLineColRangeAttrStorage * + construct(::mlir::AttributeStorageAllocator &allocator, KeyTy &&tblgenKey) { + auto numInArray = std::get<1>(tblgenKey).size(); + // Note: Considered asserting that numInArray is at least 1, but this + // is not needed in memory or in printed form. This should very rarely be + // 0 here as that means a NamedLoc would have been more efficient. But this + // does allow for location with just a file, and also having the interface + // be more uniform. + auto locEnc = numInArray == 0 ? 1 : numInArray; + // Allocate a new storage instance. + auto byteSize = + FileLineColRangeAttrStorage::totalSizeToAlloc(locEnc - 1); + auto *rawMem = + allocator.allocate(byteSize, alignof(FileLineColRangeAttrStorage)); + auto *result = ::new (rawMem) FileLineColRangeAttrStorage( + std::move(std::get<0>(tblgenKey)), locEnc - 1); + if (numInArray > 0) { + result->startLine = std::get<1>(tblgenKey)[0]; + // Copy in the element types into the trailing storage. + std::uninitialized_copy(std::next(std::get<1>(tblgenKey).begin()), + std::get<1>(tblgenKey).end(), + result->getTrailingObjects()); + } + return result; + } + + // Return the number of held types. + unsigned size() const { return filenameAndTrailing.getInt() + 1; } + + bool operator==(const KeyTy &tblgenKey) const { + return (filenameAndTrailing.getPointer() == std::get<0>(tblgenKey)) && + (size() == std::get<1>(tblgenKey).size()) && + (startLine == std::get<1>(tblgenKey)[0]) && + (ArrayRef{getTrailingObjects(), size() - 1} == + ArrayRef{std::get<1>(tblgenKey)}.drop_front()); + } + + unsigned getLineCols(unsigned index) const { + return getTrailingObjects()[index - 1]; + } + + unsigned getStartLine() const { return startLine; } + unsigned getStartColumn() const { + if (size() <= 1) + return 0; + return getLineCols(1); + } + unsigned getEndColumn() const { + if (size() <= 2) + return getStartColumn(); + return getLineCols(2); + } + unsigned getEndLine() const { + if (size() <= 3) + return getStartLine(); + return getLineCols(3); + } + + static ::llvm::hash_code hashKey(const KeyTy &tblgenKey) { + return ::llvm::hash_combine(std::get<0>(tblgenKey), std::get<1>(tblgenKey)); + } + + // Supports + // - 0 (file:line) + // - 1 (file:line:col) + // - 2 (file:line:start_col to file:line:end_col) and + // - 3 (file:start_line:start_col to file:end_line:end_col) + llvm::PointerIntPair filenameAndTrailing; + unsigned startLine = 0; +}; +} // namespace mlir::detail //===----------------------------------------------------------------------===// -// BuiltinDialect +/// Tablegen Attribute Definitions //===----------------------------------------------------------------------===// -void BuiltinDialect::registerLocationAttributes() { - addAttributes< -#define GET_ATTRDEF_LIST +#define GET_ATTRDEF_CLASSES #include "mlir/IR/BuiltinLocationAttributes.cpp.inc" - >(); -} //===----------------------------------------------------------------------===// // LocationAttr @@ -66,6 +153,59 @@ CallSiteLoc CallSiteLoc::get(Location name, ArrayRef frames) { return CallSiteLoc::get(name, caller); } +//===----------------------------------------------------------------------===// +// FileLineColLoc +//===----------------------------------------------------------------------===// + +FileLineColLoc FileLineColLoc::get(StringAttr filename, unsigned line, + unsigned column) { + return llvm::cast( + FileLineColRange::get(filename, line, column)); +} + +FileLineColLoc FileLineColLoc::get(MLIRContext *context, StringRef fileName, + unsigned line, unsigned column) { + return llvm::cast( + FileLineColRange::get(context, fileName, line, column)); +} + +StringAttr FileLineColLoc::getFilename() const { + return FileLineColRange::getFilename(); +} + +unsigned FileLineColLoc::getLine() const { return getStartLine(); } + +unsigned FileLineColLoc::getColumn() const { return getStartColumn(); } + +bool FileLineColLoc::classof(Attribute attr) { + // This could also have been for <= 2. But given this is matching previous + // behavior, it is left as is. + if (auto range = mlir::dyn_cast(attr)) + return range.getImpl()->size() == 2; + return false; +} + +//===----------------------------------------------------------------------===// +// FileLineColRange +//===----------------------------------------------------------------------===// + +StringAttr FileLineColRange::getFilename() const { + return getImpl()->filenameAndTrailing.getPointer(); +} + +unsigned FileLineColRange::getStartLine() const { + return getImpl()->getStartLine(); +} +unsigned FileLineColRange::getStartColumn() const { + return getImpl()->getStartColumn(); +} +unsigned FileLineColRange::getEndColumn() const { + return getImpl()->getEndColumn(); +} +unsigned FileLineColRange::getEndLine() const { + return getImpl()->getEndLine(); +} + //===----------------------------------------------------------------------===// // FusedLoc //===----------------------------------------------------------------------===// @@ -107,3 +247,14 @@ Location FusedLoc::get(ArrayRef locs, Attribute metadata, return Base::get(context, locs, metadata); } + +//===----------------------------------------------------------------------===// +// BuiltinDialect +//===----------------------------------------------------------------------===// + +void BuiltinDialect::registerLocationAttributes() { + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/IR/BuiltinLocationAttributes.cpp.inc" + >(); +} diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp index 7e45f18b660ba7..0aa9dcb36681b3 100644 --- a/mlir/lib/Transforms/RemoveDeadValues.cpp +++ b/mlir/lib/Transforms/RemoveDeadValues.cpp @@ -573,16 +573,12 @@ void RemoveDeadValues::runOnOperation() { Operation *module = getOperation(); // The removal of non-live values is performed iff there are no branch ops, - // all symbol ops present in the IR are function-like, and all symbol user ops - // present in the IR are call-like. + // and all symbol user ops present in the IR are call-like. WalkResult acceptableIR = module->walk([&](Operation *op) { if (op == module) return WalkResult::advance(); - if (isa(op) || - (isa(op) && !isa(op)) || - (isa(op) && !isa(op))) { - op->emitError() << "cannot optimize an IR with non-function symbol ops, " - "non-call symbol user ops or branch ops\n"; + if (isa(op)) { + op->emitError() << "cannot optimize an IR with branch ops\n"; return WalkResult::interrupt(); } return WalkResult::advance(); diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 5acd095da8e386..710c976281dc3d 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -434,23 +434,25 @@ class MoveBlockRewrite : public BlockRewrite { class BlockTypeConversionRewrite : public BlockRewrite { public: BlockTypeConversionRewrite(ConversionPatternRewriterImpl &rewriterImpl, - Block *block, Block *origBlock) - : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, block), - origBlock(origBlock) {} + Block *origBlock, Block *newBlock) + : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, origBlock), + newBlock(newBlock) {} static bool classof(const IRRewrite *rewrite) { return rewrite->getKind() == Kind::BlockTypeConversion; } - Block *getOrigBlock() const { return origBlock; } + Block *getOrigBlock() const { return block; } + + Block *getNewBlock() const { return newBlock; } void commit(RewriterBase &rewriter) override; void rollback() override; private: - /// The original block that was requested to have its signature converted. - Block *origBlock; + /// The new block that was created as part of this signature conversion. + Block *newBlock; }; /// Replacing a block argument. This rewrite is not immediately reflected in the @@ -721,6 +723,18 @@ static bool hasRewrite(R &&rewrites, Operation *op) { }); } +#ifndef NDEBUG +/// Return "true" if there is a block rewrite that matches the specified +/// rewrite type and block among the given rewrites. +template +static bool hasRewrite(R &&rewrites, Block *block) { + return any_of(std::forward(rewrites), [&](auto &rewrite) { + auto *rewriteTy = dyn_cast(rewrite.get()); + return rewriteTy && rewriteTy->getBlock() == block; + }); +} +#endif // NDEBUG + //===----------------------------------------------------------------------===// // ConversionPatternRewriterImpl //===----------------------------------------------------------------------===// @@ -966,12 +980,12 @@ void BlockTypeConversionRewrite::commit(RewriterBase &rewriter) { // block. if (auto *listener = dyn_cast_or_null(rewriter.getListener())) - for (Operation *op : block->getUsers()) + for (Operation *op : getNewBlock()->getUsers()) listener->notifyOperationModified(op); } void BlockTypeConversionRewrite::rollback() { - block->replaceAllUsesWith(origBlock); + getNewBlock()->replaceAllUsesWith(getOrigBlock()); } void ReplaceBlockArgRewrite::commit(RewriterBase &rewriter) { @@ -1223,6 +1237,9 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( ConversionPatternRewriter &rewriter, Block *block, const TypeConverter *converter, TypeConverter::SignatureConversion &signatureConversion) { + // A block cannot be converted multiple times. + assert(!hasRewrite(rewrites, block) && + "block was already converted"); OpBuilder::InsertionGuard g(rewriter); // If no arguments are being changed or added, there is nothing to do. @@ -1308,7 +1325,7 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion( appendRewrite(block, origArg, converter); } - appendRewrite(newBlock, block); + appendRewrite(/*origBlock=*/block, newBlock); // Erase the old block. (It is just unlinked for now and will be erased during // cleanup.) diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index eeaac27461b118..272d066831f927 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,4 +1,4 @@ numpy>=1.19.5, <=2.1.2 -pybind11>=2.9.0, <=2.13.6 +pybind11>=2.10.0, <=2.13.6 PyYAML>=5.4.0, <=6.0.1 ml_dtypes>=0.1.0, <=0.5.0 # provides several NumPy dtype extensions, including the bf16 diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir new file mode 100644 index 00000000000000..ed7fa6508d5ade --- /dev/null +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-target-attr.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-opt %s --pass-pipeline="builtin.module(gpu.module(convert-to-llvm{dynamic=true}))" | FileCheck %s + +// CHECK-LABEL: gpu.module @nvvm_module +gpu.module @nvvm_module [#nvvm.target] { + // CHECK-LABEL: llvm.func @kernel_0() + func.func @kernel_0() -> index { + // CHECK: = nvvm.read.ptx.sreg.tid.x : i32 + // CHECK: = llvm.sext %{{.*}} : i32 to i64 + %tIdX = gpu.thread_id x + // CHECK: = nvvm.read.ptx.sreg.laneid range : i32 + // CHECK: = llvm.sext %{{.*}} : i32 to i64 + %laneId = gpu.lane_id + %sum = index.add %tIdX, %laneId + func.return %sum : index + } + +// CHECK-LABEL: llvm.func @kernel_1 +// CHECK: (%{{.*}}: !llvm.ptr<1>, %arg1: !llvm.ptr<1>, %arg2: i64) +// CHECK: attributes {gpu.kernel, gpu.known_block_size = array, nvvm.kernel, nvvm.maxntid = array} + gpu.func @kernel_1(%arg0 : memref>) kernel attributes {known_block_size = array} { + gpu.return + } +} + +// CHECK-LABEL: gpu.module @nvvm_module_2 +gpu.module @nvvm_module_2 { + // CHECK-LABEL: llvm.func @kernel_0() + func.func @kernel_0() -> index { + // CHECK: = gpu.thread_id x + %tIdX = gpu.thread_id x + // CHECK: = gpu.lane_id + %laneId = gpu.lane_id + %sum = index.add %tIdX, %laneId + func.return %sum : index + } + +// CHECK-LABEL: gpu.func @kernel_1 +// CHECK: (%{{.*}}: memref>) kernel attributes {known_block_size = array} + gpu.func @kernel_1(%arg0 : memref>) kernel attributes {known_block_size = array} { + gpu.return + } +} diff --git a/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir new file mode 100644 index 00000000000000..0288aa11313c72 --- /dev/null +++ b/mlir/test/Conversion/MemRefToLLVM/type-conversion.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt %s -test-llvm-legalize-patterns -split-input-file + +// Test the argument materializer for ranked MemRef types. + +// CHECK-LABEL: func @construct_ranked_memref_descriptor( +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-COUNT-7: llvm.insertvalue +// CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<5x4xf32> +func.func @construct_ranked_memref_descriptor(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64) { + %0 = "test.direct_replacement"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, i64) -> (memref<5x4xf32>) + "test.legal_op"(%0) : (memref<5x4xf32>) -> () + return +} + +// ----- + +// The argument materializer for ranked MemRef types is called with incorrect +// input types. Make sure that the materializer is skipped and we do not +// generate invalid IR. + +// CHECK-LABEL: func @invalid_ranked_memref_descriptor( +// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %{{.*}} : i1 to memref<5x4xf32> +// CHECK: "test.legal_op"(%[[cast]]) +func.func @invalid_ranked_memref_descriptor(%arg0: i1) { + %0 = "test.direct_replacement"(%arg0) : (i1) -> (memref<5x4xf32>) + "test.legal_op"(%0) : (memref<5x4xf32>) -> () + return +} + +// ----- + +// Test the argument materializer for unranked MemRef types. + +// CHECK-LABEL: func @construct_unranked_memref_descriptor( +// CHECK: llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK-COUNT-2: llvm.insertvalue +// CHECK: builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(i64, ptr)> to memref<*xf32> +func.func @construct_unranked_memref_descriptor(%arg0: i64, %arg1: !llvm.ptr) { + %0 = "test.direct_replacement"(%arg0, %arg1) : (i64, !llvm.ptr) -> (memref<*xf32>) + "test.legal_op"(%0) : (memref<*xf32>) -> () + return +} + +// ----- + +// The argument materializer for unranked MemRef types is called with incorrect +// input types. Make sure that the materializer is skipped and we do not +// generate invalid IR. + +// CHECK-LABEL: func @invalid_unranked_memref_descriptor( +// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %{{.*}} : i1 to memref<*xf32> +// CHECK: "test.legal_op"(%[[cast]]) +func.func @invalid_unranked_memref_descriptor(%arg0: i1) { + %0 = "test.direct_replacement"(%arg0) : (i1) -> (memref<*xf32>) + "test.legal_op"(%0) : (memref<*xf32>) -> () + return +} diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index fe3b4a206e2b9d..330cf92bafba4a 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -333,3 +333,23 @@ func.func @index_elt_type(%arg0: memref<1x2x4x8xindex>) { // CHECK-NEXT: affine.for %{{.*}} = 0 to 8 return } + +#map = affine_map<(d0) -> (d0 + 1)> + +// CHECK-LABEL: func @arbitrary_memory_space +func.func @arbitrary_memory_space() { + %alloc = memref.alloc() : memref<256x8xi8, #spirv.storage_class> + affine.for %arg0 = 0 to 32 step 4 { + %0 = affine.apply #map(%arg0) + affine.for %arg1 = 0 to 8 step 2 { + %1 = affine.apply #map(%arg1) + affine.for %arg2 = 0 to 8 step 2 { + // CHECK: memref.alloc() : memref<1x7xi8> + %2 = affine.apply #map(%arg2) + %3 = affine.load %alloc[%0, %1] : memref<256x8xi8, #spirv.storage_class> + affine.store %3, %alloc[%0, %2] : memref<256x8xi8, #spirv.storage_class> + } + } + } + return +} diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir index c04d7d2053866c..858b7d3ddf9f11 100644 --- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir @@ -855,15 +855,16 @@ func.func @affine_prefetch_invariant() { affine.for %i0 = 0 to 10 { affine.for %i1 = 0 to 10 { %1 = affine.load %0[%i0, %i1] : memref<10x10xf32> + // A prefetch shouldn't be hoisted. affine.prefetch %0[%i0, %i0], write, locality<0>, data : memref<10x10xf32> } } // CHECK: memref.alloc() : memref<10x10xf32> // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { - // CHECK-NEXT: affine.prefetch // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { - // CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} : memref<10x10xf32> + // CHECK-NEXT: affine.load %{{.*}}[%{{.*}} : memref<10x10xf32> + // CHECK-NEXT: affine.prefetch // CHECK-NEXT: } // CHECK-NEXT: } return diff --git a/mlir/test/Dialect/Affine/loop-fusion-4.mlir b/mlir/test/Dialect/Affine/loop-fusion-4.mlir index 3fc31ad0d77b82..f46ad0f5e4c232 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-4.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-4.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{fusion-maximal mode=sibling}))' -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(spirv.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=SPIRV // Part I of fusion tests in mlir/test/Transforms/loop-fusion.mlir. // Part II of fusion tests in mlir/test/Transforms/loop-fusion-2.mlir @@ -226,3 +227,61 @@ func.func @fuse_higher_dim_nest_into_lower_dim_nest() { // PRODUCER-CONSUMER: return return } + +// ----- + +// Basic test to ensure fusion works inside other func ops like spirv.func. + +#map = affine_map<(d0, d1) -> (d0 + d1)> +module { + // SPIRV-LABEL: func @test_avgpool2d_pad_right + spirv.func @test_avgpool2d_pad_right(%arg0: !spirv.array<8192 x f32>) -> !spirv.array<8192 x f32> "None" { + %cst_f32 = spirv.Constant 0.000000e+00 : f32 + %0 = builtin.unrealized_conversion_cast %arg0 : !spirv.array<8192 x f32> to tensor<1x32x32x8xf32> + %padded = tensor.pad %0 low[0, 4, 4, 0] high[0, 4, 8193, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst_f32 : f32 + } : tensor<1x32x32x8xf32> to tensor<1x40x8229x8xf32> + %1 = bufferization.to_memref %padded : memref<1x40x8229x8xf32> + %alloc_0 = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x8xf32> + affine.for %arg1 = 0 to 1 { + affine.for %arg2 = 0 to 32 { + affine.for %arg3 = 0 to 32 { + affine.for %arg4 = 0 to 8 { + affine.for %arg5 = 0 to 1 { + affine.for %arg6 = 0 to 1 { + %4 = affine.apply #map(%arg2, %arg5) + %5 = affine.apply #map(%arg3, %arg6) + %6 = affine.load %1[%arg1, %4, %5, %arg4] : memref<1x40x8229x8xf32> + %7 = affine.load %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32> + %8 = arith.addf %7, %6 : f32 + affine.store %8, %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32> + } + } + } + } + } + } + %alloc_1 = memref.alloc() {alignment = 64 : i64} : memref<1x32x32x8xf32> + affine.for %arg1 = 0 to 1 { + affine.for %arg2 = 0 to 32 { + affine.for %arg3 = 0 to 32 { + affine.for %arg4 = 0 to 8 { + %4 = affine.load %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32> + } + } + } + } + // Test fusion. + // SPIRV: affine.for %{{.*}} = 0 to 1 { + // SPIRV-NEXT: affine.for %{{.*}} = 0 to 32 { + // SPIRV-NEXT: affine.for %{{.*}} = 0 to 32 { + // SPIRV-NEXT: affine.for %{{.*}} = 0 to 8 { + // SPIRV-NOT: affine.for %{{.*}} + + // SPIRV: ReturnValue + %2 = bufferization.to_tensor %alloc_1 : memref<1x32x32x8xf32> + %3 = builtin.unrealized_conversion_cast %2 : tensor<1x32x32x8xf32> to !spirv.array<8192 x f32> + spirv.ReturnValue %3 : !spirv.array<8192 x f32> + } +} diff --git a/mlir/test/Dialect/GPU/barrier-elimination.mlir b/mlir/test/Dialect/GPU/barrier-elimination.mlir index 1f5b84937deb05..7f6619adcd78f9 100644 --- a/mlir/test/Dialect/GPU/barrier-elimination.mlir +++ b/mlir/test/Dialect/GPU/barrier-elimination.mlir @@ -182,3 +182,20 @@ attributes {__parallel_region_boundary_for_test} { %4 = memref.load %C[] : memref return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32 } + +// CHECK-LABEL: @nested_loop_barrier_only +func.func @nested_loop_barrier_only() attributes {__parallel_region_boundary_for_test} { + %c0 = arith.constant 0 : index + %c42 = arith.constant 42 : index + %c1 = arith.constant 1 : index + // Note: the barrier can be removed and as consequence the loops get folded + // by the greedy rewriter. + // CHECK-NOT: scf.for + // CHECK-NOT: gpu.barrier + scf.for %j = %c0 to %c42 step %c1 { + scf.for %i = %c0 to %c42 step %c1 { + gpu.barrier + } + } + return +} diff --git a/mlir/test/IR/locations.mlir b/mlir/test/IR/locations.mlir index 0c6426ebec8746..b725307b420b79 100644 --- a/mlir/test/IR/locations.mlir +++ b/mlir/test/IR/locations.mlir @@ -33,6 +33,15 @@ func.func @inline_notation() -> i32 { // CHECK-LABEL: func private @loc_attr(i1 {foo.loc_attr = loc(callsite("foo" at "mysource.cc":10:8))}) func.func private @loc_attr(i1 {foo.loc_attr = loc(callsite("foo" at "mysource.cc":10:8))}) +// CHECK-LABEL: func.func private @filelocrange_attr1(i1 {foo.loc_attr = loc("mysource.cc":10:0)}) +func.func private @filelocrange_attr1(i1 {foo.loc_attr = loc("mysource.cc":10)}) +// CHECK-LABEL: func.func private @filelocrange_attr2(i1 {foo.loc_attr = loc("mysource.cc":10:8)}) +func.func private @filelocrange_attr2(i1 {foo.loc_attr = loc("mysource.cc":10:8)}) +// CHECK-LABEL: func.func private @filelocrange_attr3(i1 {foo.loc_attr = loc("mysource.cc":10:8 to :12)}) +func.func private @filelocrange_attr3(i1 {foo.loc_attr = loc("mysource.cc":10:8 to :12)}) +// CHECK-LABEL: func.func private @filelocrange_attr4(i1 {foo.loc_attr = loc("mysource.cc":10:8 to 12:4)}) +func.func private @filelocrange_attr4(i1 {foo.loc_attr = loc("mysource.cc":10:8 to 12:4)}) + // Check that locations get properly escaped. // CHECK-LABEL: func @escape_strings() func.func @escape_strings() { diff --git a/mlir/test/IR/properties.mlir b/mlir/test/IR/properties.mlir index 9a1c49cb7dabf3..b339a03812badb 100644 --- a/mlir/test/IR/properties.mlir +++ b/mlir/test/IR/properties.mlir @@ -1,4 +1,4 @@ -// # RUN: mlir-opt %s -split-input-file | mlir-opt |FileCheck %s +// # RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s // # RUN: mlir-opt %s -mlir-print-op-generic -split-input-file | mlir-opt -mlir-print-op-generic | FileCheck %s --check-prefix=GENERIC // CHECK: test.with_properties @@ -38,6 +38,14 @@ test.using_property_in_custom [1, 4, 20] // GENERIC-SAME: }> test.using_property_ref_in_custom 1 + 4 = 5 +// Tests that the variadic segment size properties are elided. +// CHECK: %[[CI64:.*]] = arith.constant +// CHECK-NEXT: test.variadic_segment_prop %[[CI64]], %[[CI64]] : %[[CI64]] : i64, i64 : i64 end +// GENERIC: %[[CI64:.*]] = "arith.constant"() +// GENERIC-NEXT: "test.variadic_segment_prop"(%[[CI64]], %[[CI64]], %[[CI64]]) <{operandSegmentSizes = array, resultSegmentSizes = array}> : (i64, i64, i64) -> (i64, i64, i64) +%ci64 = arith.constant 0 : i64 +test.variadic_segment_prop %ci64, %ci64 : %ci64 : i64, i64 : i64 end + // CHECK: test.with_default_valued_properties na{{$}} // GENERIC: "test.with_default_valued_properties"() // GENERIC-SAME: <{a = 0 : i32, b = "", c = -1 : i32, unit = false}> : () -> () diff --git a/mlir/test/Transforms/remove-dead-values.mlir b/mlir/test/Transforms/remove-dead-values.mlir index 9f2be3331b6b4b..826f6159a36b67 100644 --- a/mlir/test/Transforms/remove-dead-values.mlir +++ b/mlir/test/Transforms/remove-dead-values.mlir @@ -1,12 +1,15 @@ // RUN: mlir-opt %s -remove-dead-values -split-input-file -verify-diagnostics | FileCheck %s -// The IR remains untouched because of the presence of a non-function-like -// symbol op inside the module (const @__dont_touch_unacceptable_ir). +// The IR is updated regardless of memref.global private constant // module { -// expected-error @+1 {{cannot optimize an IR with non-function symbol ops, non-call symbol user ops or branch ops}} - memref.global "private" constant @__dont_touch_unacceptable_ir : memref = dense<0> + // CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64} + memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64} func.func @main(%arg0: i32) -> i32 { + %0 = tensor.empty() : tensor<10xbf16> + // CHECK-NOT: memref.get_global + %1 = memref.get_global @__constant_4xi32 : memref<4xi32> + // CHECK-NOT: tensor.empty return %arg0 : i32 } } @@ -29,7 +32,7 @@ module @named_module_acceptable { // func.func @dont_touch_unacceptable_ir_has_cleanable_simple_op_with_branch_op(%arg0: i1) { %non_live = arith.constant 0 : i32 - // expected-error @+1 {{cannot optimize an IR with non-function symbol ops, non-call symbol user ops or branch ops}} + // expected-error @+1 {{cannot optimize an IR with branch ops}} cf.cond_br %arg0, ^bb1(%non_live : i32), ^bb2(%non_live : i32) ^bb1(%non_live_0 : i32): cf.br ^bb3 diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index e5503ee8920424..e05f444afa68f0 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -379,15 +379,24 @@ builtin.module { // ----- -// expected-remark @below {{applyPartialConversion failed}} module { - func.func private @callee(%0 : f32) -> f32 - - func.func @caller( %arg: f32) { - // expected-error @below {{failed to legalize}} - %1 = func.call @callee(%arg) : (f32) -> f32 - return - } +// CHECK-LABEL: func.func private @callee() -> (f16, f16) +func.func private @callee() -> (f32, i24) + +// CHECK: func.func @caller() +func.func @caller() { + // f32 is converted to (f16, f16). + // i24 is converted to (). + // CHECK: %[[call:.*]]:2 = call @callee() : () -> (f16, f16) + %0:2 = func.call @callee() : () -> (f32, i24) + + // CHECK: %[[cast1:.*]] = "test.cast"() : () -> i24 + // CHECK: %[[cast0:.*]] = "test.cast"(%[[call]]#0, %[[call]]#1) : (f16, f16) -> f32 + // CHECK: "test.some_user"(%[[cast0]], %[[cast1]]) : (f32, i24) -> () + // expected-remark @below{{'test.some_user' is not legalizable}} + "test.some_user"(%0#0, %0#1) : (f32, i24) -> () + "test.return"() : () -> () +} } // ----- diff --git a/mlir/test/get_darwin_real_python.py b/mlir/test/get_darwin_real_python.py new file mode 100644 index 00000000000000..63bd08bcff89e1 --- /dev/null +++ b/mlir/test/get_darwin_real_python.py @@ -0,0 +1,16 @@ +# On macOS, system python binaries like /usr/bin/python and $(xcrun -f python3) +# are shims. They do some light validation work and then spawn the "real" python +# binary. Find the "real" python by asking dyld -- sys.executable reports the +# wrong thing more often than not. This is also useful when we're running under +# a Homebrew python3 binary, which also appears to be some kind of shim. +def getDarwinRealPythonExecutable(): + import ctypes + + dyld = ctypes.cdll.LoadLibrary("/usr/lib/system/libdyld.dylib") + namelen = ctypes.c_ulong(1024) + name = ctypes.create_string_buffer(b"\000", namelen.value) + dyld._NSGetExecutablePath(ctypes.byref(name), ctypes.byref(namelen)) + return name.value.decode("utf-8").strip() + + +print(getDarwinRealPythonExecutable()) diff --git a/mlir/test/lib/Dialect/LLVM/CMakeLists.txt b/mlir/test/lib/Dialect/LLVM/CMakeLists.txt index 734757ce79da37..6a2f0ba2756d43 100644 --- a/mlir/test/lib/Dialect/LLVM/CMakeLists.txt +++ b/mlir/test/lib/Dialect/LLVM/CMakeLists.txt @@ -1,6 +1,7 @@ # Exclude tests from libMLIR.so add_mlir_library(MLIRLLVMTestPasses TestLowerToLLVM.cpp + TestPatterns.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp new file mode 100644 index 00000000000000..ab02866970b1d5 --- /dev/null +++ b/mlir/test/lib/Dialect/LLVM/TestPatterns.cpp @@ -0,0 +1,77 @@ +//===- TestPatterns.cpp - LLVM dialect test patterns ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { + +/// Replace this op (which is expected to have 1 result) with the operands. +struct TestDirectReplacementOp : public ConversionPattern { + TestDirectReplacementOp(MLIRContext *ctx, const TypeConverter &converter) + : ConversionPattern(converter, "test.direct_replacement", 1, ctx) {} + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (op->getNumResults() != 1) + return failure(); + rewriter.replaceOpWithMultiple(op, {operands}); + return success(); + } +}; + +struct TestLLVMLegalizePatternsPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestLLVMLegalizePatternsPass) + + StringRef getArgument() const final { return "test-llvm-legalize-patterns"; } + StringRef getDescription() const final { + return "Run LLVM dialect legalization patterns"; + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + MLIRContext *ctx = &getContext(); + LLVMTypeConverter converter(ctx); + mlir::RewritePatternSet patterns(ctx); + patterns.add(ctx, converter); + + // Define the conversion target used for the test. + ConversionTarget target(*ctx); + target.addLegalOp(OperationName("test.legal_op", ctx)); + + // Handle a partial conversion. + DenseSet unlegalizedOps; + ConversionConfig config; + config.unlegalizedOps = &unlegalizedOps; + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns), config))) + getOperation()->emitError() << "applyPartialConversion failed"; + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// PassRegistration +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace test { +void registerTestLLVMLegalizePatternsPass() { + PassRegistration(); +} +} // namespace test +} // namespace mlir diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index cfe19a2fd5c08b..6752113cab8d41 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -3047,6 +3047,15 @@ def TestOpUsingPropertyInCustomAndOther ); } +def TestOpWithVariadicSegmentProperties : TEST_Op<"variadic_segment_prop", + [AttrSizedOperandSegments, AttrSizedResultSegments]> { + let arguments = (ins Variadic:$a1, Variadic:$a2); + let results = (outs Variadic:$b1, Variadic:$b2); + let assemblyFormat = [{ + $a1 `:` $a2 `:` type($b1) `:` type($b2) prop-dict attr-dict `end` + }]; +} + def TestOpUsingPropertyRefInCustom : TEST_Op<"using_property_ref_in_custom"> { let assemblyFormat = "custom($first) `+` custom($second, ref($first)) attr-dict"; let arguments = (ins IntProperty<"int64_t">:$first, IntProperty<"int64_t">:$second); diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 3df6cff3c0a60b..bbd55938718fe7 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1215,6 +1215,11 @@ struct TestTypeConverter : public TypeConverter { return success(); } + // Drop I24 types. + if (t.isInteger(24)) { + return success(); + } + // Otherwise, convert the type directly. results.push_back(t); return success(); diff --git a/mlir/test/lib/Dialect/Test/TestTypeDefs.td b/mlir/test/lib/Dialect/Test/TestTypeDefs.td index 830475bed4e444..60108ac86d1edd 100644 --- a/mlir/test/lib/Dialect/Test/TestTypeDefs.td +++ b/mlir/test/lib/Dialect/Test/TestTypeDefs.td @@ -375,7 +375,7 @@ def TestI32 : Test_Type<"TestI32"> { } def TestRecursiveAlias - : Test_Type<"TestRecursiveAlias", [NativeTypeTrait<"IsMutable">]> { + : Test_Type<"TestRecursiveAlias", [MutableType]> { let mnemonic = "test_rec_alias"; let storageClass = "TestRecursiveTypeStorage"; let storageNamespace = "test"; diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 9b429b424d3575..f162f9a00efa7c 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -3,6 +3,7 @@ import os import platform import re +import shutil import subprocess import tempfile @@ -77,6 +78,75 @@ def add_runtime(name): return ToolSubst(f"%{name}", find_runtime(name)) +# Provide the path to asan runtime lib 'libclang_rt.asan_osx_dynamic.dylib' if +# available. This is darwin specific since it's currently only needed on darwin. +# Stolen from llvm/test/lit.cfg.py with a few modifications +def get_asan_rtlib(): + if not "asan" in config.available_features or not "Darwin" in config.host_os: + return "" + # Find the asan rt lib + resource_dir = ( + subprocess.check_output([config.host_cc.strip(), "-print-resource-dir"]) + .decode("utf-8") + .strip() + ) + return os.path.join( + resource_dir, "lib", "darwin", "libclang_rt.asan_osx_dynamic.dylib" + ) + + +# On macOS, we can't do the DYLD_INSERT_LIBRARIES trick with a shim python +# binary as the ASan interceptors get loaded too late. Also, when SIP is +# enabled, we can't inject libraries into system binaries at all, so we need a +# copy of the "real" python to work with. +# Stolen from lldb/test/API/lit.cfg.py with a few modifications +def find_real_python_interpreter(): + # If we're running in a virtual environment, we have to copy Python into + # the virtual environment for it to work. + if sys.prefix != sys.base_prefix: + copied_python = os.path.join(sys.prefix, "bin", "copied-python") + else: + copied_python = os.path.join(config.lldb_build_directory, "copied-python") + + # Avoid doing any work if we already copied the binary. + if os.path.isfile(copied_python): + return copied_python + + # Find the "real" python binary. + real_python = ( + subprocess.check_output( + [ + config.python_executable, + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "get_darwin_real_python.py", + ), + ] + ) + .decode("utf-8") + .strip() + ) + + shutil.copy(real_python, copied_python) + + # Now make sure the copied Python works. The Python in Xcode has a relative + # RPATH and cannot be copied. + try: + # We don't care about the output, just make sure it runs. + subprocess.check_call([copied_python, "-V"]) + except subprocess.CalledProcessError: + # The copied Python didn't work. Assume we're dealing with the Python + # interpreter in Xcode. Given that this is not a system binary SIP + # won't prevent us form injecting the interceptors, but when running in + # a virtual environment, we can't use it directly. Create a symlink + # instead. + os.remove(copied_python) + os.symlink(real_python, copied_python) + + # The copied Python works. + return copied_python + + llvm_config.with_system_environment(["HOME", "INCLUDE", "LIB", "TMP", "TEMP"]) llvm_config.use_default_substitutions() @@ -91,6 +161,7 @@ def add_runtime(name): "LICENSE.txt", "lit.cfg.py", "lit.site.cfg.py", + "get_darwin_real_python.py", ] # Tweak the PATH to include the tools dir. @@ -172,10 +243,30 @@ def add_runtime(name): ) python_executable = config.python_executable -# Python configuration with sanitizer requires some magic preloading. This will only work on clang/linux. -# TODO: detect Darwin/Windows situation (or mark these tests as unsupported on these platforms). -if "asan" in config.available_features and "Linux" in config.host_os: - python_executable = f"LD_PRELOAD=$({config.host_cxx} -print-file-name=libclang_rt.asan-{config.host_arch}.so) {config.python_executable}" +# Python configuration with sanitizer requires some magic preloading. This will only work on clang/linux/darwin. +# TODO: detect Windows situation (or mark these tests as unsupported on these platforms). +if "asan" in config.available_features: + if "Linux" in config.host_os: + python_executable = f"LD_PRELOAD=$({config.host_cxx} -print-file-name=libclang_rt.asan-{config.host_arch}.so) {config.python_executable}" + if "Darwin" in config.host_os: + # Ensure we use a non-shim Python executable, for the `DYLD_INSERT_LIBRARIES` + # env variable to take effect + real_python_executable = find_real_python_interpreter() + if real_python_executable: + python_executable = real_python_executable + lit_config.note( + "Using {} instead of {}".format( + python_executable, config.python_executable + ) + ) + + asan_rtlib = get_asan_rtlib() + lit_config.note("Using ASan rtlib {}".format(asan_rtlib)) + config.environment["MallocNanoZone"] = "0" + config.environment["ASAN_OPTIONS"] = "detect_stack_use_after_return=1" + config.environment["DYLD_INSERT_LIBRARIES"] = asan_rtlib + + # On Windows the path to python could contains spaces in which case it needs to be provided in quotes. # This is the equivalent of how %python is setup in llvm/utils/lit/lit/llvm/config.py. elif "Windows" in config.host_os: diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index 74967032562351..a794a3fc6fa006 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -1,6 +1,6 @@ # RUN: %PYTHON %s 2>&1 | FileCheck %s -import gc, sys +import gc, os, sys, tempfile from mlir.ir import * from mlir.passmanager import * from mlir.dialects.func import FuncOp @@ -340,3 +340,45 @@ def testPrintIrBeforeAndAfterAll(): # CHECK: } # CHECK: } pm.run(module) + + +# CHECK-LABEL: TEST: testPrintIrTree +@run +def testPrintIrTree(): + with Context() as ctx: + module = ModuleOp.parse( + """ + module { + func.func @main() { + %0 = arith.constant 10 + return + } + } + """ + ) + pm = PassManager.parse("builtin.module(canonicalize)") + ctx.enable_multithreading(False) + pm.enable_ir_printing() + # CHECK-LABEL: // Tree printing begin + # CHECK: \-- builtin_module_no-symbol-name + # CHECK: \-- 0_canonicalize.mlir + # CHECK-LABEL: // Tree printing end + pm.run(module) + log("// Tree printing begin") + with tempfile.TemporaryDirectory() as temp_dir: + pm.enable_ir_printing(tree_printing_dir_path=temp_dir) + pm.run(module) + + def print_file_tree(directory, prefix=""): + entries = sorted(os.listdir(directory)) + for i, entry in enumerate(entries): + path = os.path.join(directory, entry) + connector = "\-- " if i == len(entries) - 1 else "|-- " + log(f"{prefix}{connector}{entry}") + if os.path.isdir(path): + print_file_tree( + path, prefix + (" " if i == len(entries) - 1 else "│ ") + ) + + print_file_tree(temp_dir) + log("// Tree printing end") diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 002c3900056dee..94bc67a1e96093 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -113,6 +113,7 @@ void registerTestLinalgRankReduceContractionOps(); void registerTestLinalgTransforms(); void registerTestLivenessAnalysisPass(); void registerTestLivenessPass(); +void registerTestLLVMLegalizePatternsPass(); void registerTestLoopFusion(); void registerTestLoopMappingPass(); void registerTestLoopUnrollingPass(); @@ -250,6 +251,7 @@ void registerTestPasses() { mlir::test::registerTestLinalgTransforms(); mlir::test::registerTestLivenessAnalysisPass(); mlir::test::registerTestLivenessPass(); + mlir::test::registerTestLLVMLegalizePatternsPass(); mlir::test::registerTestLoopFusion(); mlir::test::registerTestLoopMappingPass(); mlir::test::registerTestLoopUnrollingPass(); diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 7e2b0694a860a3..8d2e15a941370c 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -2008,10 +2008,26 @@ static void genNonDefaultValueCheck(MethodBody &body, const Operator &op, << "() != " << propElement.getVar()->prop.getDefaultValue(); } +/// Elide the variadic segment size attributes if necessary. +/// This pushes elided attribute names in `elidedStorage`. +static void genVariadicSegmentElision(OperationFormat &fmt, Operator &op, + MethodBody &body, + const char *elidedStorage) { + if (!fmt.allOperands && + op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) + body << " " << elidedStorage << ".push_back(\"operandSegmentSizes\");\n"; + if (!fmt.allResultTypes && + op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) + body << " " << elidedStorage << ".push_back(\"resultSegmentSizes\");\n"; +} + /// Generate the printer for the 'prop-dict' directive. static void genPropDictPrinter(OperationFormat &fmt, Operator &op, MethodBody &body) { body << " ::llvm::SmallVector<::llvm::StringRef, 2> elidedProps;\n"; + + genVariadicSegmentElision(fmt, op, body, "elidedProps"); + for (const NamedProperty *namedProperty : fmt.usedProperties) body << " elidedProps.push_back(\"" << namedProperty->name << "\");\n"; for (const NamedAttribute *namedAttr : fmt.usedAttributes) @@ -2057,13 +2073,9 @@ static void genPropDictPrinter(OperationFormat &fmt, Operator &op, static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, MethodBody &body, bool withKeyword) { body << " ::llvm::SmallVector<::llvm::StringRef, 2> elidedAttrs;\n"; - // Elide the variadic segment size attributes if necessary. - if (!fmt.allOperands && - op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) - body << " elidedAttrs.push_back(\"operandSegmentSizes\");\n"; - if (!fmt.allResultTypes && - op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) - body << " elidedAttrs.push_back(\"resultSegmentSizes\");\n"; + + genVariadicSegmentElision(fmt, op, body, "elidedAttrs"); + for (const StringRef key : fmt.inferredAttributes.keys()) body << " elidedAttrs.push_back(\"" << key << "\");\n"; for (const NamedAttribute *attr : fmt.usedAttributes) diff --git a/mlir/unittests/IR/AffineMapTest.cpp b/mlir/unittests/IR/AffineMapTest.cpp index eaeb18d128ca5e..166692f731d1cf 100644 --- a/mlir/unittests/IR/AffineMapTest.cpp +++ b/mlir/unittests/IR/AffineMapTest.cpp @@ -76,3 +76,57 @@ TEST(AffineMapTest, isProjectedPermutation) { AffineMap map10 = AffineMap::get(6, 0, {d5, d3, d2, d4}, &ctx); EXPECT_TRUE(map10.isProjectedPermutation()); } + +TEST(AffineMapTest, getInversePermutation) { + MLIRContext ctx; + OpBuilder b(&ctx); + + // 0. Empty map + AffineMap map0 = AffineMap::get(0, 0, {}, &ctx); + AffineMap inverseMap0 = inversePermutation(map0); + EXPECT_TRUE(inverseMap0.isEmpty()); + + auto d0 = b.getAffineDimExpr(0); + auto d1 = b.getAffineDimExpr(1); + auto d2 = b.getAffineDimExpr(2); + + // 1. (d0, d1, d2) -> (d1, d1, d0, d2, d1, d2, d1, d0) + AffineMap map1 = AffineMap::get(3, 0, {d1, d1, d0, d2, d1, d2, d1, d0}, &ctx); + // (d0, d1, d2, d3, d4, d5, d6, d7) -> (d2, d0, d3) + AffineMap inverseMap1 = inversePermutation(map1); + auto resultsInv1 = inverseMap1.getResults(); + EXPECT_EQ(resultsInv1.size(), 3UL); + + // 1.1 Expect d2 + AffineDimExpr expr = llvm::dyn_cast(resultsInv1[0]); + EXPECT_TRUE(expr && expr.getPosition() == 2); + + // 1.2 Expect d0 + expr = llvm::dyn_cast(resultsInv1[1]); + EXPECT_TRUE(expr && expr.getPosition() == 0); + + // 1.3 Expect d3 + expr = llvm::dyn_cast(resultsInv1[2]); + EXPECT_TRUE(expr && expr.getPosition() == 3); + + // 2. (d0, d1, d2) -> (d1, d0 + d1, d0, d2, d1, d2, d1, d0) + auto sum = d0 + d1; + AffineMap map2 = + AffineMap::get(3, 0, {d1, sum, d0, d2, d1, d2, d1, d0}, &ctx); + // (d0, d1, d2, d3, d4, d5, d6, d7) -> (d2, d0, d3) + AffineMap inverseMap2 = inversePermutation(map2); + auto resultsInv2 = inverseMap2.getResults(); + EXPECT_EQ(resultsInv2.size(), 3UL); + + // 2.1 Expect d2 + expr = llvm::dyn_cast(resultsInv2[0]); + EXPECT_TRUE(expr && expr.getPosition() == 2); + + // 2.2 Expect d0 + expr = llvm::dyn_cast(resultsInv2[1]); + EXPECT_TRUE(expr && expr.getPosition() == 0); + + // 2.3 Expect d3 + expr = llvm::dyn_cast(resultsInv2[2]); + EXPECT_TRUE(expr && expr.getPosition() == 3); +} diff --git a/mlir/utils/vscode/package-lock.json b/mlir/utils/vscode/package-lock.json index 1efd5779f5cb25..e2f331122df896 100644 --- a/mlir/utils/vscode/package-lock.json +++ b/mlir/utils/vscode/package-lock.json @@ -1,12 +1,12 @@ { "name": "vscode-mlir", - "version": "0.0.11", + "version": "0.0.12", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "vscode-mlir", - "version": "0.0.11", + "version": "0.0.12", "dependencies": { "base64-js": "^1.5.1", "chokidar": "3.5.2", @@ -18,7 +18,7 @@ "@types/vscode": "~1.67.0", "@vscode/vsce": "^2.19.0", "clang-format": "^1.8.0", - "typescript": "^4.6.4", + "typescript": "^4.9.5", "vscode-test": "^1.3.0" }, "engines": { @@ -1195,9 +1195,9 @@ } }, "node_modules/minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "dependencies": { "brace-expansion": "^1.1.7" }, @@ -1826,9 +1826,9 @@ } }, "node_modules/typescript": { - "version": "4.6.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.4.tgz", - "integrity": "sha512-9ia/jWHIEbo49HfjrLGfKbZSuWo9iTMwXO+Ca3pRsSpbsMbc7/IU8NKdCZVRRBafVPGnoJeFL76ZOAA84I9fEg==", + "version": "4.9.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", + "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", "dev": true, "bin": { "tsc": "bin/tsc", @@ -2895,9 +2895,9 @@ "optional": true }, "minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "requires": { "brace-expansion": "^1.1.7" } @@ -3394,9 +3394,9 @@ } }, "typescript": { - "version": "4.6.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.4.tgz", - "integrity": "sha512-9ia/jWHIEbo49HfjrLGfKbZSuWo9iTMwXO+Ca3pRsSpbsMbc7/IU8NKdCZVRRBafVPGnoJeFL76ZOAA84I9fEg==", + "version": "4.9.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", + "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", "dev": true }, "uc.micro": { diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index fde4b2f930349e..3ed5c02ed4a3bb 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -34,6 +34,7 @@ elseif(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) # We may need to get the headers directly from the 'libc' source directory. target_include_directories(PluginCommon PRIVATE ${CMAKE_SOURCE_DIR}/../libc/utils/gpu/server + ${CMAKE_SOURCE_DIR}/../libc/ ${CMAKE_SOURCE_DIR}/../libc/include) endif() endif() diff --git a/offload/plugins-nextgen/common/include/RPC.h b/offload/plugins-nextgen/common/include/RPC.h index 01bf539bcb3f32..5b9b7ffd086b57 100644 --- a/offload/plugins-nextgen/common/include/RPC.h +++ b/offload/plugins-nextgen/common/include/RPC.h @@ -61,7 +61,7 @@ struct RPCServerTy { private: /// Array from this device's identifier to its attached devices. - llvm::SmallVector Handles; + llvm::SmallVector Buffers; }; } // namespace llvm::omp::target diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp index faa2cbd4f02fe1..dfb417f66864bf 100644 --- a/offload/plugins-nextgen/common/src/RPC.cpp +++ b/offload/plugins-nextgen/common/src/RPC.cpp @@ -12,9 +12,11 @@ #include "PluginInterface.h" +// TODO: This should be included unconditionally and cleaned up. #if defined(LIBOMPTARGET_RPC_SUPPORT) -#include "llvm-libc-types/rpc_opcodes_t.h" +#include "include/llvm-libc-types/rpc_opcodes_t.h" #include "llvmlibc_rpc_server.h" +#include "shared/rpc.h" #endif using namespace llvm; @@ -22,14 +24,14 @@ using namespace omp; using namespace target; RPCServerTy::RPCServerTy(plugin::GenericPluginTy &Plugin) - : Handles(Plugin.getNumDevices()) {} + : Buffers(Plugin.getNumDevices()) {} llvm::Expected RPCServerTy::isDeviceUsingRPC(plugin::GenericDeviceTy &Device, plugin::GenericGlobalHandlerTy &Handler, plugin::DeviceImageTy &Image) { #ifdef LIBOMPTARGET_RPC_SUPPORT - return Handler.isSymbolInImage(Device, Image, rpc_client_symbol_name); + return Handler.isSymbolInImage(Device, Image, "__llvm_libc_rpc_client"); #else return false; #endif @@ -39,59 +41,18 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device, plugin::GenericGlobalHandlerTy &Handler, plugin::DeviceImageTy &Image) { #ifdef LIBOMPTARGET_RPC_SUPPORT - auto Alloc = [](uint64_t Size, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - return Device.allocate(Size, nullptr, TARGET_ALLOC_HOST); - }; uint64_t NumPorts = - std::min(Device.requestedRPCPortCount(), RPC_MAXIMUM_PORT_COUNT); - rpc_device_t RPCDevice; - if (rpc_status_t Err = rpc_server_init(&RPCDevice, NumPorts, - Device.getWarpSize(), Alloc, &Device)) + std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT); + void *RPCBuffer = Device.allocate( + rpc::Server::allocation_size(Device.getWarpSize(), NumPorts), nullptr, + TARGET_ALLOC_HOST); + if (!RPCBuffer) return plugin::Plugin::error( - "Failed to initialize RPC server for device %d: %d", - Device.getDeviceId(), Err); - - // Register a custom opcode handler to perform plugin specific allocation. - auto MallocHandler = [](rpc_port_t Port, void *Data) { - rpc_recv_and_send( - Port, - [](rpc_buffer_t *Buffer, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - Buffer->data[0] = reinterpret_cast(Device.allocate( - Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING)); - }, - Data); - }; - if (rpc_status_t Err = - rpc_register_callback(RPCDevice, RPC_MALLOC, MallocHandler, &Device)) - return plugin::Plugin::error( - "Failed to register RPC malloc handler for device %d: %d\n", - Device.getDeviceId(), Err); - - // Register a custom opcode handler to perform plugin specific deallocation. - auto FreeHandler = [](rpc_port_t Port, void *Data) { - rpc_recv( - Port, - [](rpc_buffer_t *Buffer, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - Device.free(reinterpret_cast(Buffer->data[0]), - TARGET_ALLOC_DEVICE_NON_BLOCKING); - }, - Data); - }; - if (rpc_status_t Err = - rpc_register_callback(RPCDevice, RPC_FREE, FreeHandler, &Device)) - return plugin::Plugin::error( - "Failed to register RPC free handler for device %d: %d\n", - Device.getDeviceId(), Err); + "Failed to initialize RPC server for device %d", Device.getDeviceId()); // Get the address of the RPC client from the device. void *ClientPtr; - plugin::GlobalTy ClientGlobal(rpc_client_symbol_name, sizeof(void *)); + plugin::GlobalTy ClientGlobal("__llvm_libc_rpc_client", sizeof(void *)); if (auto Err = Handler.getGlobalMetadataFromDevice(Device, Image, ClientGlobal)) return Err; @@ -100,38 +61,63 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device, sizeof(void *), nullptr)) return Err; - const void *ClientBuffer = rpc_get_client_buffer(RPCDevice); - if (auto Err = Device.dataSubmit(ClientPtr, ClientBuffer, - rpc_get_client_size(), nullptr)) + rpc::Client client(NumPorts, RPCBuffer); + if (auto Err = + Device.dataSubmit(ClientPtr, &client, sizeof(rpc::Client), nullptr)) return Err; - Handles[Device.getDeviceId()] = RPCDevice.handle; + Buffers[Device.getDeviceId()] = RPCBuffer; + + return Error::success(); + #endif return Error::success(); } Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) { #ifdef LIBOMPTARGET_RPC_SUPPORT - rpc_device_t RPCDevice{Handles[Device.getDeviceId()]}; - if (rpc_status_t Err = rpc_handle_server(RPCDevice)) - return plugin::Plugin::error( - "Error while running RPC server on device %d: %d", Device.getDeviceId(), - Err); + uint64_t NumPorts = + std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT); + rpc::Server Server(NumPorts, Buffers[Device.getDeviceId()]); + + auto port = Server.try_open(Device.getWarpSize()); + if (!port) + return Error::success(); + + int Status = rpc::SUCCESS; + switch (port->get_opcode()) { + case RPC_MALLOC: { + port->recv_and_send([&](rpc::Buffer *Buffer, uint32_t) { + Buffer->data[0] = reinterpret_cast(Device.allocate( + Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING)); + }); + break; + } + case RPC_FREE: { + port->recv([&](rpc::Buffer *Buffer, uint32_t) { + Device.free(reinterpret_cast(Buffer->data[0]), + TARGET_ALLOC_DEVICE_NON_BLOCKING); + }); + break; + } + default: + // Let the `libc` library handle any other unhandled opcodes. + Status = libc_handle_rpc_port(&*port, Device.getWarpSize()); + break; + } + port->close(); + + if (Status != rpc::SUCCESS) + return createStringError("RPC server given invalid opcode!"); + + return Error::success(); #endif return Error::success(); } Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) { #ifdef LIBOMPTARGET_RPC_SUPPORT - rpc_device_t RPCDevice{Handles[Device.getDeviceId()]}; - auto Dealloc = [](void *Ptr, void *Data) { - plugin::GenericDeviceTy &Device = - *reinterpret_cast(Data); - Device.free(Ptr, TARGET_ALLOC_HOST); - }; - if (rpc_status_t Err = rpc_server_shutdown(RPCDevice, Dealloc, &Device)) - return plugin::Plugin::error( - "Failed to shut down RPC server for device %d: %d", - Device.getDeviceId(), Err); + Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST); + return Error::success(); #endif return Error::success(); } diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 0628947540ca73..b3c7108d840d38 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -1220,6 +1220,7 @@ cc_library( ":Core", ":MC", ":Object", + ":ProfileData", ":Support", ":TargetParser", ":config", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 74f13788ab29f7..51d72d2e5f5b23 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4886,7 +4886,9 @@ cc_library( ":FuncExtensions", ":FuncToLLVM", ":FuncTransformOps", + ":GPUToGPURuntimeTransforms", ":GPUToLLVMIRTranslation", + ":GPUToNVVMTransforms", ":GPUTransformOps", ":IndexToLLVM", ":LLVMToLLVMIRTranslation", @@ -5907,6 +5909,7 @@ cc_library( ":ControlFlowDialect", ":ControlFlowToLLVM", ":ConversionPassIncGen", + ":ConvertToLLVMInterface", ":FuncDialect", ":FuncToLLVM", ":GPUCommonTransforms", @@ -6088,6 +6091,7 @@ cc_library( hdrs = [ "include/mlir/Conversion/GPUCommon/AttrToSPIRVConverter.h", "include/mlir/Conversion/GPUCommon/GPUCommonPass.h", + "include/mlir/Conversion/GPUCommon/GPUToLLVM.h", "lib/Conversion/GPUCommon/GPUOpsLowering.h", ], includes = ["include"], @@ -8374,6 +8378,31 @@ cc_library( ], ) +gentbl_cc_library( + name = "ToLLVMInterfaceIncGen", + tbl_outs = [ + ( + ["--gen-attr-interface-decls"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.h.inc", + ), + ( + ["--gen-attr-interface-defs"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMAttrInterface.cpp.inc", + ), + ( + ["--gen-op-interface-decls"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.h.inc", + ), + ( + ["--gen-op-interface-defs"], + "include/mlir/Conversion/ConvertToLLVM/ToLLVMOpInterface.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Conversion/ConvertToLLVM/ToLLVMInterface.td", + deps = [":UBDialectTdFiles"], +) + cc_library( name = "ConvertToLLVMInterface", srcs = ["lib/Conversion/ConvertToLLVM/ToLLVMInterface.cpp"], @@ -8382,6 +8411,7 @@ cc_library( deps = [ ":IR", ":Support", + ":ToLLVMInterfaceIncGen", "//llvm:Support", ], ) @@ -8394,6 +8424,7 @@ cc_library( deps = [ ":ConversionPassIncGen", ":ConvertToLLVMInterface", + ":Analysis", ":IR", ":LLVMCommonConversion", ":LLVMDialect", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index c69f793943beec..688edacbc93bf9 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -807,6 +807,7 @@ cc_library( "//mlir:FuncToLLVM", "//mlir:IR", "//mlir:IndexToLLVM", + "//mlir:LLVMCommonConversion", "//mlir:LLVMDialect", "//mlir:LinalgTransforms", "//mlir:MathToLLVM", @@ -815,6 +816,7 @@ cc_library( "//mlir:Pass", "//mlir:ReconcileUnrealizedCasts", "//mlir:SCFToControlFlow", + "//mlir:TransformUtils", "//mlir:Transforms", "//mlir:VectorToLLVM", "//mlir:VectorToSCF",