Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Clang][NEON] Add neon target guard to intrinsics #99870

Merged
merged 3 commits into from
Jul 22, 2024
Merged

Conversation

Lukacma
Copy link
Contributor

@Lukacma Lukacma commented Jul 22, 2024

This patch improves reported error when NEON intrinsics are used without neon target feature.

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Jul 22, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Jul 22, 2024

@llvm/pr-subscribers-clang

Author: None (Lukacma)

Changes

This patch improves reported error when NEON intrinsics are used without neon target feature.


Full diff: https://github.com/llvm/llvm-project/pull/99870.diff

4 Files Affected:

  • (modified) clang/include/clang/Basic/arm_neon.td (+32-32)
  • (modified) clang/include/clang/Basic/arm_neon_incl.td (+1-1)
  • (added) clang/test/Sema/aarch64-neon-without-target-feature.cpp (+36)
  • (modified) clang/utils/TableGen/NeonEmitter.cpp (+3-3)
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 6390ba3f9fe5e..3098fa67e6a51 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -289,7 +289,7 @@ def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
                    "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
   let isLaneQ = 1;
 }
-let TargetGuard = "bf16" in {
+let TargetGuard = "bf16,neon" in {
   def SPLAT_BF  : WInst<"splat_lane", ".(!q)I", "bQb">;
   def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
     let isLaneQ = 1;
@@ -323,7 +323,7 @@ def VMLSL    : SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", OP_MLSL>;
 def VQDMULH  : SInst<"vqdmulh", "...", "siQsQi">;
 def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">;
 
-let TargetGuard = "v8.1a" in {
+let TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH : SInst<"vqrdmlah", "....", "siQsQi">;
 def VQRDMLSH : SInst<"vqrdmlsh", "....", "siQsQi">;
 }
@@ -614,7 +614,7 @@ def A64_VQDMULH_LANE  : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
 def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
 }
 
-let TargetGuard = "v8.1a" in {
+let TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>;
 def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>;
 }
@@ -957,7 +957,7 @@ def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>;
 def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>;
 def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>;
 def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>;
-let TargetGuard = "aes" in {
+let TargetGuard = "aes,neon" in {
   def VMULL_P64    : SInst<"vmull", "(1>)11", "Pl">;
   def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>;
 }
@@ -1091,7 +1091,7 @@ let isLaneQ = 1 in {
 def VQDMULH_LANEQ  : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
 def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
 }
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
 def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
   let isLaneQ = 1;
 }
@@ -1122,14 +1122,14 @@ def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Crypto
-let ArchGuard = "__ARM_ARCH >= 8", TargetGuard = "aes" in {
+let ArchGuard = "__ARM_ARCH >= 8", TargetGuard = "aes,neon" in {
 def AESE : SInst<"vaese", "...", "QUc">;
 def AESD : SInst<"vaesd", "...", "QUc">;
 def AESMC : SInst<"vaesmc", "..", "QUc">;
 def AESIMC : SInst<"vaesimc", "..", "QUc">;
 }
 
-let ArchGuard = "__ARM_ARCH >= 8", TargetGuard = "sha2" in {
+let ArchGuard = "__ARM_ARCH >= 8", TargetGuard = "sha2,neon" in {
 def SHA1H : SInst<"vsha1h", "11", "Ui">;
 def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">;
 def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">;
@@ -1143,7 +1143,7 @@ def SHA256H2 : SInst<"vsha256h2", "....", "QUi">;
 def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">;
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3,neon" in {
 def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
 def RAX1 : SInst<"vrax1", "...", "QUl">;
@@ -1153,14 +1153,14 @@ def XAR :  SInst<"vxar", "...I", "QUl">;
 }
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3,neon" in {
 def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
 def SHA512su1 : SInst<"vsha512su1", "....", "QUl">;
 def SHA512H : SInst<"vsha512h", "....", "QUl">;
 def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4,neon" in {
 def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
 def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
 def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
@@ -1170,7 +1170,7 @@ def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
 def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4,neon" in {
 def SM4E : SInst<"vsm4e", "...", "QUi">;
 def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">;
 }
@@ -1227,7 +1227,7 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
 def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.5a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.5a,neon" in {
 def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
 def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
 def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
@@ -1401,7 +1401,7 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">;
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
 def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">;
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
 ////////////////////////////////////////////////////////////////////////////////
 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">;
@@ -1632,7 +1632,7 @@ def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_
   let isLaneQ = 1;
 }
 
-let TargetGuard = "v8.1a" in {
+let TargetGuard = "v8.1a,neon" in {
 // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
 def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>;
 def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN> {
@@ -1654,7 +1654,7 @@ def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcS
 } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
 
 // ARMv8.2-A FP16 vector intrinsics for A32/A64.
-let TargetGuard = "fullfp16" in {
+let TargetGuard = "fullfp16,neon" in {
 
   // ARMv8.2-A FP16 one-operand vector intrinsics.
 
@@ -1679,7 +1679,7 @@ let TargetGuard = "fullfp16" in {
   def VCVTP_U16    : SInst<"vcvtp_u16", "U.", "hQh">;
 
   // Vector rounding
-  let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)", TargetGuard = "fullfp16" in {
+  let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)", TargetGuard = "fullfp16,neon" in {
     def FRINTZH      : SInst<"vrnd",  "..", "hQh">;
     def FRINTNH      : SInst<"vrndn", "..", "hQh">;
     def FRINTAH      : SInst<"vrnda", "..", "hQh">;
@@ -1728,7 +1728,7 @@ let TargetGuard = "fullfp16" in {
   // Max/Min
   def VMAXH         : SInst<"vmax", "...", "hQh">;
   def VMINH         : SInst<"vmin", "...", "hQh">;
-  let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)", TargetGuard = "fullfp16" in {
+  let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)", TargetGuard = "fullfp16,neon" in {
     def FMAXNMH       : SInst<"vmaxnm", "...", "hQh">;
     def FMINNMH       : SInst<"vminnm", "...", "hQh">;
   }
@@ -1775,7 +1775,7 @@ def VEXTH      : WInst<"vext", "...I", "hQh">;
 def VREV64H    : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
 
 // ARMv8.2-A FP16 vector intrinsics for A64 only.
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16,neon" in {
 
   // Vector rounding
   def FRINTIH      : SInst<"vrndi", "..", "hQh">;
@@ -1872,11 +1872,11 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
 }
 
 // v8.2-A dot product instructions.
-let TargetGuard = "dotprod" in {
+let TargetGuard = "dotprod,neon" in {
   def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">;
   def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<<q)I", "iUiQiQUi", OP_DOT_LN>;
 }
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod,neon" in {
   // Variants indexing into a 128-bit vector are A64 only.
   def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
     let isLaneQ = 1;
@@ -1884,7 +1884,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "d
 }
 
 // v8.2-A FP16 fused multiply-add long instructions.
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fp16fml" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fp16fml,neon" in {
   def VFMLAL_LOW  : SInst<"vfmlal_low",  ">>..", "hQh">;
   def VFMLSL_LOW  : SInst<"vfmlsl_low",  ">>..", "hQh">;
   def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">;
@@ -1909,7 +1909,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
   }
 }
 
-let TargetGuard = "i8mm" in {
+let TargetGuard = "i8mm,neon" in {
   def VMMLA   : SInst<"vmmla", "..(<<)(<<)", "QUiQi">;
   def VUSMMLA : SInst<"vusmmla", "..(<<U)(<<)", "Qi">;
 
@@ -1926,7 +1926,7 @@ let TargetGuard = "i8mm" in {
   }
 }
 
-let TargetGuard = "bf16" in {
+let TargetGuard = "bf16,neon" in {
   def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">;
   def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>;
   def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> {
@@ -1970,7 +1970,7 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
 }
 
 // v8.3-A Vector complex addition intrinsics
-let TargetGuard = "v8.3a,fullfp16" in {
+let TargetGuard = "v8.3a,fullfp16,neon" in {
   def VCADD_ROT90_FP16   : SInst<"vcadd_rot90", "...", "h">;
   def VCADD_ROT270_FP16  : SInst<"vcadd_rot270", "...", "h">;
   def VCADDQ_ROT90_FP16  : SInst<"vcaddq_rot90", "QQQ", "h">;
@@ -1978,7 +1978,7 @@ let TargetGuard = "v8.3a,fullfp16" in {
 
   defm VCMLA_FP16  : VCMLA_ROTS<"h", "uint32x2_t", "uint32x4_t">;
 }
-let TargetGuard = "v8.3a" in {
+let TargetGuard = "v8.3a,neon" in {
   def VCADD_ROT90   : SInst<"vcadd_rot90", "...", "f">;
   def VCADD_ROT270  : SInst<"vcadd_rot270", "...", "f">;
   def VCADDQ_ROT90  : SInst<"vcaddq_rot90", "QQQ", "f">;
@@ -1986,7 +1986,7 @@ let TargetGuard = "v8.3a" in {
 
   defm VCMLA_F32        : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
 }
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a,neon" in {
   def VCADDQ_ROT90_FP64  : SInst<"vcaddq_rot90", "QQQ", "d">;
   def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
 
@@ -1994,7 +1994,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v
 }
 
 // V8.2-A BFloat intrinsics
-let TargetGuard = "bf16" in {
+let TargetGuard = "bf16,neon" in {
   def VCREATE_BF : NoTestOpInst<"vcreate", ".(IU>)", "b", OP_CAST> {
     let BigEndianSafe = 1;
   }
@@ -2058,14 +2058,14 @@ let TargetGuard = "bf16" in {
   def SCALAR_CVT_F32_BF16 : SOpInst<"vcvtah_f32", "(1F>)(1!)", "b", OP_CVT_F32_BF16>;
 }
 
-let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in {
+let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16,neon" in {
   def VCVT_BF16_F32_A32_INTERNAL : WInst<"__a32_vcvt_bf16", "BQ", "f">;
   def VCVT_BF16_F32_A32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A32>;
   def VCVT_LOW_BF16_F32_A32 : SOpInst<"vcvt_low_bf16",  "BQ", "Qf", OP_VCVT_BF16_F32_LO_A32>;
   def VCVT_HIGH_BF16_F32_A32 : SOpInst<"vcvt_high_bf16", "BBQ", "Qf", OP_VCVT_BF16_F32_HI_A32>;
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16,neon" in {
   def VCVT_LOW_BF16_F32_A64_INTERNAL : WInst<"__a64_vcvtq_low_bf16", "BQ", "Hf">;
   def VCVT_LOW_BF16_F32_A64 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A64>;
   def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">;
@@ -2077,14 +2077,14 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
   def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>;
 }
 
-let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in {
+let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16,neon" in {
   let BigEndianSafe = 1 in {
     defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
         "csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">;
   }
 }
 
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16,neon" in {
   let BigEndianSafe = 1 in {
     defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
         "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">;
@@ -2092,7 +2092,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
 }
 
 // v8.9a/v9.4a LRCPC3 intrinsics
-let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
   def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
   def VSTL1_LANE  : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
 }
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index b8155c187d1bc..3b8015daee6d9 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -265,7 +265,7 @@ class Inst <string n, string p, string t, Operation o> {
   string Prototype = p;
   string Types = t;
   string ArchGuard = "";
-  string TargetGuard = "";
+  string TargetGuard = "neon";
 
   Operation Operation = o;
   bit BigEndianSafe = 0;
diff --git a/clang/test/Sema/aarch64-neon-without-target-feature.cpp b/clang/test/Sema/aarch64-neon-without-target-feature.cpp
new file mode 100644
index 0000000000000..0831eb7c754a7
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-without-target-feature.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +dotprod  -target-feature +fullfp16 -target-feature +fp16fml -target-feature +i8mm -target-feature +bf16 -verify -emit-llvm -o - %s
+
+// REQUIRES: aarch64-registered-target
+
+// This test is testing the diagnostics that Clang emits when compiling without '+neon'.
+
+#include <arm_neon.h>
+
+void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64, bfloat16x4_t v4bf16, __bf16 bf16, poly64_t poly64, poly64x2_t poly64x2) {
+  // dotprod
+  vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'neon'}}
+  vdot_laneq_u32(v2i32, v8i8, v16i8, 1); // expected-error {{always_inline function 'vdot_u32' requires target feature 'neon'}} expected-error {{'__builtin_neon_splat_laneq_v' needs target feature neon}}
+  // fp16
+  vceqz_f16(v4f16); // expected-error {{always_inline function 'vceqz_f16' requires target feature 'neon'}}
+  vrnd_f16(v4f16); // expected-error {{always_inline function 'vrnd_f16' requires target feature 'neon'}}
+  vmaxnm_f16(v4f16, v4f16); // expected-error {{always_inline function 'vmaxnm_f16' requires target feature 'neon'}}
+  vrndi_f16(v4f16); // expected-error {{always_inline function 'vrndi_f16' requires target feature 'neon'}}
+  // fp16fml depends on fp-armv8
+  vfmlal_low_f16(v2f32, v4f16, v4f16); // expected-error {{always_inline function 'vfmlal_low_f16' requires target feature 'neon'}}
+  // i8mm
+  vmmlaq_s32(v4i32, v8i16, v8i16); // expected-error {{always_inline function 'vmmlaq_s32' requires target feature 'neon'}}
+  vusdot_laneq_s32(v2i32, v8i8, v8i16, 0); // expected-error {{always_inline function 'vusdot_s32' requires target feature 'neon'}} expected-error {{'__builtin_neon_splat_laneq_v' needs target feature neon}}
+  // bf16
+  vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline function 'vbfdot_f32' requires target feature 'neon'}}
+  vcreate_bf16(10);
+  vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_bf16' needs target feature bf16,neon}}
+  vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' requires target feature 'neon'}}
+  vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16,neon}}
+  vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'neon'}}
+  vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'neon'}}
+  vmull_p64(poly64, poly64);  // expected-error {{always_inline function 'vmull_p64' requires target feature 'neon'}}
+  vmull_high_p64(poly64x2, poly64x2);  // expected-error {{always_inline function 'vmull_high_p64' requires target feature 'neon'}}
+  vtrn1_s8(v8i8, v8i8); // expected-error {{always_inline function 'vtrn1_s8' requires target feature 'neon'}}
+  vqabsq_s16(v8i16); // expected-error {{always_inline function 'vqabsq_s16' requires target feature 'neon'}}
+  vbslq_s16(v8i16, v8i16, v8i16);// expected-error {{always_inline function 'vbslq_s16' requires target feature 'neon'}}
+}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 626031d38cf00..30fbb8c5d65e5 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -952,7 +952,7 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
   char typeCode = '\0';
   bool printNumber = true;
 
-  if (CK == ClassB && TargetGuard == "")
+  if (CK == ClassB && TargetGuard == "neon")
     return "";
 
   if (T.isBFloat16())
@@ -976,7 +976,7 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
       break;
     }
   }
-  if (CK == ClassB && TargetGuard == "") {
+  if (CK == ClassB && TargetGuard == "neon") {
     typeCode = '\0';
   }
 
@@ -1078,7 +1078,7 @@ std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
     S += "_" + getInstTypeCode(InBaseType, LocalCK);
   }
 
-  if (LocalCK == ClassB && TargetGuard == "")
+  if (LocalCK == ClassB && TargetGuard == "neon")
     S += "_v";
 
   // Insert a 'q' before the first '_' character so that it ends up before

@Lukacma Lukacma merged commit 0284b4b into llvm:main Jul 22, 2024
10 checks passed
@Lukacma
Copy link
Contributor Author

Lukacma commented Jul 22, 2024

This PR fixes and reapplies #98624

@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 22, 2024

LLVM Buildbot has detected a new failure on builder sanitizer-x86_64-linux running on sanitizer-buildbot2 while building clang at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/1949

Here is the relevant piece of the build log for the reference:

Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
[366/371] Generating MSAN_INST_GTEST.gtest-all.cc.x86_64.o
[367/371] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64-with-call.o
[368/371] Generating Msan-x86_64-with-call-Test
[369/371] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64.o
[370/371] Generating Msan-x86_64-Test
[370/371] Running compiler_rt regression tests
llvm-lit: /b/sanitizer-x86_64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 10048 tests, 88 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90..
TIMEOUT: SanitizerCommon-hwasan-x86_64-Linux :: Posix/fork_threaded.c (10048 of 10048)
******************** TEST 'SanitizerCommon-hwasan-x86_64-Linux :: Posix/fork_threaded.c' FAILED ********************
Exit Code: -9
Timeout: Reached timeout of 900 seconds

Command Output (stderr):
--
RUN: at line 1: /b/sanitizer-x86_64-linux/build/build_default/./bin/clang  -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -fsanitize-hwaddress-experimental-aliasing  -m64 -funwind-tables  -I/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O0 /b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Posix/fork_threaded.c -o /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp && env HWASAN_OPTIONS=die_after_fork=0  /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp
+ /b/sanitizer-x86_64-linux/build/build_default/./bin/clang -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -fsanitize-hwaddress-experimental-aliasing -m64 -funwind-tables -I/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O0 /b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Posix/fork_threaded.c -o /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp
+ env HWASAN_OPTIONS=die_after_fork=0 /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp
HWAddressSanitizer:DEADLYSIGNAL
==1970781==ERROR: HWAddressSanitizer: SEGV on unknown address (pc 0x64f65e0c20cf bp 0x70e9a63a0798 sp 0x7ffca3a9ae70 T1970781)
==1970781==The signal is caused by a READ memory access.
==1970781==Hint: this fault was caused by a dereference of a high value address (see register values below).  Disassemble the provided pc to learn which register was used.
==1970398==ERROR: HWAddressSanitizer: tag-mismatch on address 0x581400000280 at pc 0x64f65e07edf7
READ of size 155 at 0x581400000280 tags: 02/00 (ptr/mem) in thread T1

--

********************
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.. 
Slowest Tests:
--------------------------------------------------------------------------
900.09s: SanitizerCommon-hwasan-x86_64-Linux :: Posix/fork_threaded.c
199.28s: libFuzzer-x86_64-default-Linux :: out-of-process-fuzz.test
193.28s: libFuzzer-x86_64-libcxx-Linux :: out-of-process-fuzz.test
187.94s: libFuzzer-x86_64-static-libcxx-Linux :: out-of-process-fuzz.test
48.32s: libFuzzer-i386-static-libcxx-Linux :: value-profile-switch.test
47.88s: ThreadSanitizer-x86_64 :: bench_threads.cpp
43.72s: libFuzzer-i386-default-Linux :: value-profile-switch.test
43.19s: libFuzzer-i386-default-Linux :: fork_corpus_groups.test
41.99s: libFuzzer-i386-libcxx-Linux :: value-profile-switch.test
41.39s: libFuzzer-i386-libcxx-Linux :: disable-leaks.test
41.34s: libFuzzer-i386-static-libcxx-Linux :: fork_corpus_groups.test
41.20s: libFuzzer-i386-default-Linux :: fork.test
41.08s: libFuzzer-i386-static-libcxx-Linux :: fork.test
40.97s: libFuzzer-i386-libcxx-Linux :: fork.test
40.81s: libFuzzer-x86_64-default-Linux :: fork.test
40.59s: libFuzzer-x86_64-libcxx-Linux :: fork.test
39.80s: libFuzzer-x86_64-libcxx-Linux :: fork_corpus_groups.test
Step 9 (test compiler-rt symbolizer) failure: test compiler-rt symbolizer (failure)
...
[366/371] Generating MSAN_INST_GTEST.gtest-all.cc.x86_64.o
[367/371] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64-with-call.o
[368/371] Generating Msan-x86_64-with-call-Test
[369/371] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64.o
[370/371] Generating Msan-x86_64-Test
[370/371] Running compiler_rt regression tests
llvm-lit: /b/sanitizer-x86_64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 10048 tests, 88 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90..
TIMEOUT: SanitizerCommon-hwasan-x86_64-Linux :: Posix/fork_threaded.c (10048 of 10048)
******************** TEST 'SanitizerCommon-hwasan-x86_64-Linux :: Posix/fork_threaded.c' FAILED ********************
Exit Code: -9
Timeout: Reached timeout of 900 seconds

Command Output (stderr):
--
RUN: at line 1: /b/sanitizer-x86_64-linux/build/build_default/./bin/clang  -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -fsanitize-hwaddress-experimental-aliasing  -m64 -funwind-tables  -I/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O0 /b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Posix/fork_threaded.c -o /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp && env HWASAN_OPTIONS=die_after_fork=0  /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp
+ /b/sanitizer-x86_64-linux/build/build_default/./bin/clang -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -fsanitize-hwaddress-experimental-aliasing -m64 -funwind-tables -I/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O0 /b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Posix/fork_threaded.c -o /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp
+ env HWASAN_OPTIONS=die_after_fork=0 /b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/hwasan-x86_64-Linux/Posix/Output/fork_threaded.c.tmp
HWAddressSanitizer:DEADLYSIGNAL
==1970781==ERROR: HWAddressSanitizer: SEGV on unknown address (pc 0x64f65e0c20cf bp 0x70e9a63a0798 sp 0x7ffca3a9ae70 T1970781)
==1970781==The signal is caused by a READ memory access.
==1970781==Hint: this fault was caused by a dereference of a high value address (see register values below).  Disassemble the provided pc to learn which register was used.
==1970398==ERROR: HWAddressSanitizer: tag-mismatch on address 0x581400000280 at pc 0x64f65e07edf7
READ of size 155 at 0x581400000280 tags: 02/00 (ptr/mem) in thread T1

--

********************
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.. 
Slowest Tests:
--------------------------------------------------------------------------
900.09s: SanitizerCommon-hwasan-x86_64-Linux :: Posix/fork_threaded.c
199.28s: libFuzzer-x86_64-default-Linux :: out-of-process-fuzz.test
193.28s: libFuzzer-x86_64-libcxx-Linux :: out-of-process-fuzz.test
187.94s: libFuzzer-x86_64-static-libcxx-Linux :: out-of-process-fuzz.test
48.32s: libFuzzer-i386-static-libcxx-Linux :: value-profile-switch.test
47.88s: ThreadSanitizer-x86_64 :: bench_threads.cpp
43.72s: libFuzzer-i386-default-Linux :: value-profile-switch.test
43.19s: libFuzzer-i386-default-Linux :: fork_corpus_groups.test
41.99s: libFuzzer-i386-libcxx-Linux :: value-profile-switch.test
41.39s: libFuzzer-i386-libcxx-Linux :: disable-leaks.test
41.34s: libFuzzer-i386-static-libcxx-Linux :: fork_corpus_groups.test
41.20s: libFuzzer-i386-default-Linux :: fork.test
41.08s: libFuzzer-i386-static-libcxx-Linux :: fork.test
40.97s: libFuzzer-i386-libcxx-Linux :: fork.test
40.81s: libFuzzer-x86_64-default-Linux :: fork.test
40.59s: libFuzzer-x86_64-libcxx-Linux :: fork.test
39.80s: libFuzzer-x86_64-libcxx-Linux :: fork_corpus_groups.test

sgundapa pushed a commit to sgundapa/upstream_effort that referenced this pull request Jul 23, 2024
This patch improves reported error when NEON intrinsics are used without
neon target feature.
yuxuanchen1997 pushed a commit that referenced this pull request Jul 25, 2024
This patch improves reported error when NEON intrinsics are used without
neon target feature.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants