From 7a7875611862627c811be8d0bbbc2a5a861862e4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 12 Jul 2023 09:34:43 +0200 Subject: [PATCH] [LSR] Regenerate test checks (NFC) --- .../LoopStrengthReduce/ARM/ivchain-ARM.ll | 231 ++++++++++++++---- .../addrec-gep-address-space.ll | 141 +++++++---- .../LoopStrengthReduce/addrec-gep.ll | 133 ++++++---- .../LoopStrengthReduce/illegal-addr-modes.ll | 98 +++++++- .../missing-phi-operand-update.ll | 101 +++++++- 5 files changed, 554 insertions(+), 150 deletions(-) diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll index 6c90697920870b..70d6cffdbd0042 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -1,17 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -opaque-pointers=0 -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9 ; @simple is the most basic chain of address induction variables. Chaining ; saves at least one register and avoids complex addressing and setup ; code. ; -; A9: @simple ; no expensive address computation in the preheader -; A9: lsl -; A9-NOT: lsl -; A9: %loop ; no complex address modes -; A9-NOT: lsl define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind { +; A9-LABEL: simple: +; A9: @ %bb.0: @ %entry +; A9-NEXT: .save {r4, r5, r6, lr} +; A9-NEXT: push {r4, r5, r6, lr} +; A9-NEXT: mov r3, r0 +; A9-NEXT: lsls r2, r2, #2 +; A9-NEXT: movs r0, #0 +; A9-NEXT: .LBB0_1: @ %loop +; A9-NEXT: @ =>This Inner Loop Header: Depth=1 +; A9-NEXT: add.w lr, r3, r2 +; A9-NEXT: ldr.w r12, [r3, r2] +; A9-NEXT: ldr r3, [r3] +; A9-NEXT: add.w r4, lr, r2 +; A9-NEXT: ldr.w r6, [lr, r2] +; A9-NEXT: add r0, r3 +; A9-NEXT: adds r3, r4, r2 +; A9-NEXT: add r0, r12 +; A9-NEXT: ldr r5, [r4, r2] +; A9-NEXT: add r0, r6 +; A9-NEXT: add r3, r2 +; A9-NEXT: add r0, r5 +; A9-NEXT: cmp r3, r1 +; A9-NEXT: bne .LBB0_1 +; A9-NEXT: @ %bb.2: @ %exit +; A9-NEXT: pop {r4, r5, r6, pc} entry: br label %loop loop: @@ -37,15 +58,34 @@ exit: ; @user is not currently chained because the IV is live across memory ops. ; -; A9: @user ; stride multiples computed in the preheader -; A9: lsl -; A9: lsl -; A9: %loop ; complex address modes -; A9: lsl -; A9: lsl define i32 @user(i32* %a, i32* %b, i32 %x) nounwind { +; A9-LABEL: user: +; A9: @ %bb.0: @ %entry +; A9-NEXT: .save {r4, r5, r6, r7, lr} +; A9-NEXT: push {r4, r5, r6, r7, lr} +; A9-NEXT: add.w r3, r2, r2, lsl #1 +; A9-NEXT: lsl.w r12, r2, #4 +; A9-NEXT: lsl.w lr, r3, #2 +; A9-NEXT: movs r3, #0 +; A9-NEXT: .LBB1_1: @ %loop +; A9-NEXT: @ =>This Inner Loop Header: Depth=1 +; A9-NEXT: ldr r4, [r0] +; A9-NEXT: ldr.w r5, [r0, r2, lsl #3] +; A9-NEXT: ldr.w r6, [r0, r2, lsl #2] +; A9-NEXT: add r3, r4 +; A9-NEXT: ldr.w r7, [r0, lr] +; A9-NEXT: add r3, r6 +; A9-NEXT: add r3, r5 +; A9-NEXT: add r3, r7 +; A9-NEXT: str r3, [r0] +; A9-NEXT: add r0, r12 +; A9-NEXT: cmp r0, r1 +; A9-NEXT: bne .LBB1_1 +; A9-NEXT: @ %bb.2: @ %exit +; A9-NEXT: mov r0, r3 +; A9-NEXT: pop {r4, r5, r6, r7, pc} entry: br label %loop loop: @@ -75,16 +115,43 @@ exit: ; used to do, and exactly what we don't want to do. LSR's new IV ; chaining feature should now undo the damage. ; -; A9: extrastride: ; no spills -; A9-NOT: str ; only one stride multiple in the preheader -; A9: lsl -; A9-NOT: {{str r|lsl}} -; A9: %for.body{{$}} ; no complex address modes or reloads -; A9-NOT: {{ldr .*[sp]|lsl}} define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { +; A9-LABEL: extrastride: +; A9: @ %bb.0: @ %entry +; A9-NEXT: .save {r4, r5, r6, r7, lr} +; A9-NEXT: push {r4, r5, r6, r7, lr} +; A9-NEXT: ldr.w r12, [sp, #24] +; A9-NEXT: cmp.w r12, #0 +; A9-NEXT: beq .LBB2_3 +; A9-NEXT: @ %bb.1: @ %for.body.lr.ph +; A9-NEXT: ldr r4, [sp, #20] +; A9-NEXT: add.w lr, r3, r1 +; A9-NEXT: lsls r3, r4, #2 +; A9-NEXT: .LBB2_2: @ %for.body +; A9-NEXT: @ =>This Inner Loop Header: Depth=1 +; A9-NEXT: adds r5, r0, r1 +; A9-NEXT: ldr r4, [r0, r1] +; A9-NEXT: ldr r0, [r0] +; A9-NEXT: subs.w r12, r12, #1 +; A9-NEXT: ldr r6, [r5, r1] +; A9-NEXT: add r5, r1 +; A9-NEXT: add r0, r4 +; A9-NEXT: ldr r7, [r5, r1] +; A9-NEXT: add r5, r1 +; A9-NEXT: add r0, r6 +; A9-NEXT: ldr r4, [r5, r1] +; A9-NEXT: add r0, r7 +; A9-NEXT: add r0, r4 +; A9-NEXT: str r0, [r2] +; A9-NEXT: add.w r0, r5, r1 +; A9-NEXT: add r2, r3 +; A9-NEXT: add r0, lr +; A9-NEXT: bne .LBB2_2 +; A9-NEXT: .LBB2_3: @ %for.end +; A9-NEXT: pop {r4, r5, r6, r7, pc} entry: %cmp8 = icmp eq i32 %z, 0 br i1 %cmp8, label %for.end, label %for.body.lr.ph @@ -136,10 +203,38 @@ for.end: ; preds = %for.body, %entry ; } ; where 's' can be folded into the addressing mode. ; Consequently, we should *not* form any chains. -; -; A9: foldedidx: -; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3] define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp { +; A9-LABEL: foldedidx: +; A9: @ %bb.0: @ %entry +; A9-NEXT: .save {r4, r5, r6, lr} +; A9-NEXT: push {r4, r5, r6, lr} +; A9-NEXT: mov.w lr, #0 +; A9-NEXT: .LBB3_1: @ %for.body +; A9-NEXT: @ =>This Inner Loop Header: Depth=1 +; A9-NEXT: ldrb.w r12, [r0, lr] +; A9-NEXT: add.w r4, r1, lr +; A9-NEXT: ldrb.w r3, [r1, lr] +; A9-NEXT: add r3, r12 +; A9-NEXT: strb.w r3, [r2, lr] +; A9-NEXT: add.w r3, r0, lr +; A9-NEXT: ldrb.w r12, [r3, #1] +; A9-NEXT: ldrb r5, [r4, #1] +; A9-NEXT: add r12, r5 +; A9-NEXT: add.w r5, r2, lr +; A9-NEXT: strb.w r12, [r5, #1] +; A9-NEXT: add.w lr, lr, #4 +; A9-NEXT: cmp.w lr, #400 +; A9-NEXT: ldrb.w r12, [r3, #2] +; A9-NEXT: ldrb r6, [r4, #2] +; A9-NEXT: add r6, r12 +; A9-NEXT: strb r6, [r5, #2] +; A9-NEXT: ldrb r3, [r3, #3] +; A9-NEXT: ldrb r6, [r4, #3] +; A9-NEXT: add r3, r6 +; A9-NEXT: strb r3, [r5, #3] +; A9-NEXT: bne .LBB3_1 +; A9-NEXT: @ %bb.2: @ %for.end +; A9-NEXT: pop {r4, r5, r6, pc} entry: br label %for.body @@ -200,14 +295,45 @@ for.end: ; preds = %for.body ; ; Loads and stores should use post-increment addressing, no add's or add.w's. ; Most importantly, there should be no spills or reloads! -; -; A9: testNeon: -; A9: %.lr.ph -; A9-NOT: lsl.w -; A9-NOT: {{ldr|str|adds|add r}} -; A9-NOT: add.w r -; A9: bne define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize { +; A9-LABEL: testNeon: +; A9: @ %bb.0: +; A9-NEXT: .save {r4, r5, r7, lr} +; A9-NEXT: push {r4, r5, r7, lr} +; A9-NEXT: vmov.i32 q8, #0x0 +; A9-NEXT: cmp r2, #1 +; A9-NEXT: blt .LBB4_4 +; A9-NEXT: @ %bb.1: @ %.lr.ph +; A9-NEXT: movs r5, #0 +; A9-NEXT: movw r4, #64464 +; A9-NEXT: sub.w r12, r5, r2, lsl #6 +; A9-NEXT: sub.w lr, r1, r1, lsl #4 +; A9-NEXT: movt r4, #65535 +; A9-NEXT: mov r5, r3 +; A9-NEXT: .LBB4_2: @ =>This Inner Loop Header: Depth=1 +; A9-NEXT: vld1.64 {d18}, [r0], r1 +; A9-NEXT: subs r2, #1 +; A9-NEXT: vld1.64 {d19}, [r0], r1 +; A9-NEXT: vst1.8 {d18, d19}, [r5]! +; A9-NEXT: vld1.64 {d20}, [r0], r1 +; A9-NEXT: vld1.64 {d21}, [r0], r1 +; A9-NEXT: vst1.8 {d20, d21}, [r5]! +; A9-NEXT: vld1.64 {d22}, [r0], r1 +; A9-NEXT: vadd.i8 q9, q9, q10 +; A9-NEXT: vld1.64 {d23}, [r0], r1 +; A9-NEXT: vst1.8 {d22, d23}, [r5]! +; A9-NEXT: vld1.64 {d20}, [r0], r1 +; A9-NEXT: vadd.i8 q9, q9, q11 +; A9-NEXT: vld1.64 {d21}, [r0], lr +; A9-NEXT: vadd.i8 q9, q9, q10 +; A9-NEXT: vadd.i8 q8, q8, q9 +; A9-NEXT: vst1.8 {d20, d21}, [r5], r4 +; A9-NEXT: bne .LBB4_2 +; A9-NEXT: @ %bb.3: @ %._crit_edge +; A9-NEXT: add.w r3, r3, r12, lsl #4 +; A9-NEXT: .LBB4_4: +; A9-NEXT: vst1.32 {d16, d17}, [r3] +; A9-NEXT: pop {r4, r5, r7, pc} %1 = icmp sgt i32 %limit, 0 br i1 %1, label %.lr.ph, label %45 @@ -284,24 +410,41 @@ declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly ; Handle chains in which the same offset is used for both loads and ; stores to the same array. ; rdar://11410078. -; -; A9: @testReuse -; A9: %for.body -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]] -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]] -; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]] -; A9: bne define void @testReuse(i8* %src, i32 %stride) nounwind ssp { +; A9-LABEL: testReuse: +; A9: @ %bb.0: @ %entry +; A9-NEXT: sub.w r12, r0, r1, lsl #2 +; A9-NEXT: sub.w r0, r1, r1, lsl #2 +; A9-NEXT: lsls r2, r0, #1 +; A9-NEXT: movs r3, #0 +; A9-NEXT: .LBB5_1: @ %for.body +; A9-NEXT: @ =>This Inner Loop Header: Depth=1 +; A9-NEXT: add.w r0, r12, r3 +; A9-NEXT: adds r3, #8 +; A9-NEXT: vld1.8 {d16}, [r0], r1 +; A9-NEXT: cmp r3, #32 +; A9-NEXT: vld1.8 {d17}, [r0], r1 +; A9-NEXT: vhadd.u8 d16, d16, d17 +; A9-NEXT: vld1.8 {d18}, [r0], r1 +; A9-NEXT: vhadd.u8 d17, d17, d18 +; A9-NEXT: vld1.8 {d19}, [r0], r1 +; A9-NEXT: vhadd.u8 d18, d18, d19 +; A9-NEXT: vld1.8 {d20}, [r0], r1 +; A9-NEXT: vhadd.u8 d19, d19, d20 +; A9-NEXT: vld1.8 {d21}, [r0], r1 +; A9-NEXT: vhadd.u8 d20, d20, d21 +; A9-NEXT: vld1.8 {d22}, [r0], r1 +; A9-NEXT: vhadd.u8 d21, d21, d22 +; A9-NEXT: vld1.8 {d23}, [r0], r2 +; A9-NEXT: vst1.8 {d16}, [r0], r1 +; A9-NEXT: vst1.8 {d17}, [r0], r1 +; A9-NEXT: vst1.8 {d18}, [r0], r1 +; A9-NEXT: vst1.8 {d19}, [r0], r1 +; A9-NEXT: vst1.8 {d20}, [r0], r1 +; A9-NEXT: vst1.8 {d21}, [r0] +; A9-NEXT: bne .LBB5_1 +; A9-NEXT: @ %bb.2: @ %for.end +; A9-NEXT: bx lr entry: %mul = shl nsw i32 %stride, 2 %idx.neg = sub i32 0, %mul diff --git a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll index 099d7d5023ed49..b9670176c15ddc 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll @@ -1,16 +1,5 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -opaque-pointers=0 < %s -loop-reduce -S | FileCheck %s -; CHECK: bb1: -; CHECK: load double, double addrspace(1)* [[IV:%[^,]+]] -; CHECK: store double {{.*}}, double addrspace(1)* [[IV]] - -; CHECK-NOT: cast -; Make sure the GEP has the right index type -; CHECK: getelementptr double, double addrspace(1)* [[IV]], i16 1 -; CHECK: br {{.*}} label %bb1 - -; Make sure the GEP has the right index type -; CHECK: getelementptr double, double addrspace(1)* {{.*}}, i16 - ; This test tests several things. The load and store should use the ; same address instead of having it computed twice, and SCEVExpander should @@ -22,67 +11,119 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64" define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind { +; CHECK-LABEL: define void @foo +; CHECK-SAME: (i64 [[N:%.*]], i64 [[M:%.*]], i64 [[O:%.*]], i64 [[Q:%.*]], double addrspace(1)* nocapture [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i64 [[N]], 0 +; CHECK-NEXT: br i1 [[TMP]], label [[BB_NPH3:%.*]], label [[RETURN:%.*]] +; CHECK: bb.nph: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi double addrspace(1)* [ [[SCEVGEP4:%.*]], [[BB2:%.*]] ], [ [[LSR_IV:%.*]], [[BB_NPH:%.*]] ] +; CHECK-NEXT: [[J_01:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB2]] ], [ 0, [[BB_NPH]] ] +; CHECK-NEXT: [[TMP6:%.*]] = load double, double addrspace(1)* [[LSR_IV3]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], 2.100000e+00 +; CHECK-NEXT: store double [[TMP7]], double addrspace(1)* [[LSR_IV3]], align 8 +; CHECK-NEXT: [[TMP9]] = add i64 [[J_01]], 1 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[SCEVGEP4]] = getelementptr double, double addrspace(1)* [[LSR_IV3]], i16 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP9]], [[M]] +; CHECK-NEXT: br i1 [[TMP10]], label [[BB1]], label [[BB2_BB3_CRIT_EDGE:%.*]] +; CHECK: bb2.bb3_crit_edge: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[I_02:%.*]], 1 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i1, i1 addrspace(1)* [[LSR_IV1:%.*]], i16 [[TMP5:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 addrspace(1)* [[SCEVGEP2]] to double addrspace(1)* +; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP11]], [[N]] +; CHECK-NEXT: br i1 [[TMP12]], label [[BB2_PREHEADER:%.*]], label [[BB4_RETURN_CRIT_EDGE:%.*]] +; CHECK: bb4.return_crit_edge: +; CHECK-NEXT: br label [[BB4_RETURN_CRIT_EDGE_SPLIT:%.*]] +; CHECK: bb4.return_crit_edge.split: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb.nph3: +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[M]], 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[BB_NPH3_SPLIT:%.*]], label [[BB4_RETURN_CRIT_EDGE_SPLIT]] +; CHECK: bb.nph3.split: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double addrspace(1)* [[P]], i16 -2989 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[Q]], [[O]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 37 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i16 +; CHECK-NEXT: [[TMP5]] = shl i16 [[TMP4]], 3 +; CHECK-NEXT: br label [[BB2_PREHEADER]] +; CHECK: bb2.preheader: +; CHECK-NEXT: [[LSR_IV]] = phi double addrspace(1)* [ [[SCEVGEP]], [[BB_NPH3_SPLIT]] ], [ [[TMP0]], [[BB4]] ] +; CHECK-NEXT: [[I_02]] = phi i64 [ [[TMP11]], [[BB4]] ], [ 0, [[BB_NPH3_SPLIT]] ] +; CHECK-NEXT: [[LSR_IV1]] = bitcast double addrspace(1)* [[LSR_IV]] to i1 addrspace(1)* +; CHECK-NEXT: br i1 true, label [[BB_NPH]], label [[BB3]] +; CHECK: return: +; CHECK-NEXT: ret void +; entry: - %tmp = icmp sgt i64 %n, 0 ; [#uses=1] - br i1 %tmp, label %bb.nph3, label %return + %tmp = icmp sgt i64 %n, 0 ; [#uses=1] + br i1 %tmp, label %bb.nph3, label %return bb.nph: ; preds = %bb2.preheader - %tmp1 = mul i64 %tmp16, %i.02 ; [#uses=1] - %tmp2 = mul i64 %tmp19, %i.02 ; [#uses=1] - br label %bb1 + %tmp1 = mul i64 %tmp16, %i.02 ; [#uses=1] + %tmp2 = mul i64 %tmp19, %i.02 ; [#uses=1] + br label %bb1 bb1: ; preds = %bb2, %bb.nph - %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; [#uses=3] - %tmp3 = add i64 %j.01, %tmp1 ; [#uses=1] - %tmp4 = add i64 %j.01, %tmp2 ; [#uses=1] - %z0 = add i64 %tmp3, 5203 - %tmp5 = getelementptr double, double addrspace(1)* %p, i64 %z0 ; [#uses=1] - %tmp6 = load double, double addrspace(1)* %tmp5, align 8 ; [#uses=1] - %tmp7 = fdiv double %tmp6, 2.100000e+00 ; [#uses=1] - %z1 = add i64 %tmp4, 5203 - %tmp8 = getelementptr double, double addrspace(1)* %p, i64 %z1 ; [#uses=1] - store double %tmp7, double addrspace(1)* %tmp8, align 8 - %tmp9 = add i64 %j.01, 1 ; [#uses=2] - br label %bb2 + %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; [#uses=3] + %tmp3 = add i64 %j.01, %tmp1 ; [#uses=1] + %tmp4 = add i64 %j.01, %tmp2 ; [#uses=1] + %z0 = add i64 %tmp3, 5203 + %tmp5 = getelementptr double, double addrspace(1)* %p, i64 %z0 ; [#uses=1] + %tmp6 = load double, double addrspace(1)* %tmp5, align 8 ; [#uses=1] + %tmp7 = fdiv double %tmp6, 2.100000e+00 ; [#uses=1] + %z1 = add i64 %tmp4, 5203 + %tmp8 = getelementptr double, double addrspace(1)* %p, i64 %z1 ; [#uses=1] + store double %tmp7, double addrspace(1)* %tmp8, align 8 + %tmp9 = add i64 %j.01, 1 ; [#uses=2] + br label %bb2 bb2: ; preds = %bb1 - %tmp10 = icmp slt i64 %tmp9, %m ; [#uses=1] - br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge + %tmp10 = icmp slt i64 %tmp9, %m ; [#uses=1] + br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge bb2.bb3_crit_edge: ; preds = %bb2 - br label %bb3 + br label %bb3 bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge - %tmp11 = add i64 %i.02, 1 ; [#uses=2] - br label %bb4 + %tmp11 = add i64 %i.02, 1 ; [#uses=2] + br label %bb4 bb4: ; preds = %bb3 - %tmp12 = icmp slt i64 %tmp11, %n ; [#uses=1] - br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge + %tmp12 = icmp slt i64 %tmp11, %n ; [#uses=1] + br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge bb4.return_crit_edge: ; preds = %bb4 - br label %bb4.return_crit_edge.split + br label %bb4.return_crit_edge.split bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge - br label %return + br label %return bb.nph3: ; preds = %entry - %tmp13 = icmp sgt i64 %m, 0 ; [#uses=1] - %tmp14 = mul i64 %n, 37 ; [#uses=1] - %tmp15 = mul i64 %tmp14, %o ; [#uses=1] - %tmp16 = mul i64 %tmp15, %q ; [#uses=1] - %tmp17 = mul i64 %n, 37 ; [#uses=1] - %tmp18 = mul i64 %tmp17, %o ; [#uses=1] - %tmp19 = mul i64 %tmp18, %q ; [#uses=1] - br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split + %tmp13 = icmp sgt i64 %m, 0 ; [#uses=1] + %tmp14 = mul i64 %n, 37 ; [#uses=1] + %tmp15 = mul i64 %tmp14, %o ; [#uses=1] + %tmp16 = mul i64 %tmp15, %q ; [#uses=1] + %tmp17 = mul i64 %n, 37 ; [#uses=1] + %tmp18 = mul i64 %tmp17, %o ; [#uses=1] + %tmp19 = mul i64 %tmp18, %q ; [#uses=1] + br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split bb.nph3.split: ; preds = %bb.nph3 - br label %bb2.preheader + br label %bb2.preheader bb2.preheader: ; preds = %bb.nph3.split, %bb4 - %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; [#uses=3] - br i1 true, label %bb.nph, label %bb3 + %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; [#uses=3] + br i1 true, label %bb.nph, label %bb3 return: ; preds = %bb4.return_crit_edge.split, %entry - ret void + ret void } diff --git a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll index 6bf066e11a87bd..9b44d2adb6324e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll @@ -1,10 +1,5 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -opaque-pointers=0 < %s -loop-reduce -S | FileCheck %s -; CHECK: bb1: -; CHECK: load double, double* [[IV:%[^,]+]] -; CHECK: store double {{.*}}, double* [[IV]] -; CHECK: getelementptr double, double* -; CHECK-NOT: cast -; CHECK: br {{.*}} label %bb1 ; This test tests several things. The load and store should use the ; same address instead of having it computed twice, and SCEVExpander should @@ -16,67 +11,117 @@ target datalayout = "e-p:64:64:64-n32:64" define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind { +; CHECK-LABEL: define void @foo +; CHECK-SAME: (i64 [[N:%.*]], i64 [[M:%.*]], i64 [[O:%.*]], i64 [[Q:%.*]], double* nocapture [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i64 [[N]], 0 +; CHECK-NEXT: br i1 [[TMP]], label [[BB_NPH3:%.*]], label [[RETURN:%.*]] +; CHECK: bb.nph: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi double* [ [[SCEVGEP4:%.*]], [[BB2:%.*]] ], [ [[LSR_IV:%.*]], [[BB_NPH:%.*]] ] +; CHECK-NEXT: [[J_01:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB2]] ], [ 0, [[BB_NPH]] ] +; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[LSR_IV3]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], 2.100000e+00 +; CHECK-NEXT: store double [[TMP7]], double* [[LSR_IV3]], align 8 +; CHECK-NEXT: [[TMP9]] = add i64 [[J_01]], 1 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[SCEVGEP4]] = getelementptr double, double* [[LSR_IV3]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP9]], [[M]] +; CHECK-NEXT: br i1 [[TMP10]], label [[BB1]], label [[BB2_BB3_CRIT_EDGE:%.*]] +; CHECK: bb2.bb3_crit_edge: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[I_02:%.*]], 1 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i1, i1* [[LSR_IV1:%.*]], i64 [[TMP3:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1* [[SCEVGEP2]] to double* +; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP11]], [[N]] +; CHECK-NEXT: br i1 [[TMP12]], label [[BB2_PREHEADER:%.*]], label [[BB4_RETURN_CRIT_EDGE:%.*]] +; CHECK: bb4.return_crit_edge: +; CHECK-NEXT: br label [[BB4_RETURN_CRIT_EDGE_SPLIT:%.*]] +; CHECK: bb4.return_crit_edge.split: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb.nph3: +; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[M]], 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[BB_NPH3_SPLIT:%.*]], label [[BB4_RETURN_CRIT_EDGE_SPLIT]] +; CHECK: bb.nph3.split: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[P]], i64 5203 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[Q]], [[O]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], [[N]] +; CHECK-NEXT: [[TMP3]] = mul i64 [[TMP2]], 296 +; CHECK-NEXT: br label [[BB2_PREHEADER]] +; CHECK: bb2.preheader: +; CHECK-NEXT: [[LSR_IV]] = phi double* [ [[SCEVGEP]], [[BB_NPH3_SPLIT]] ], [ [[TMP0]], [[BB4]] ] +; CHECK-NEXT: [[I_02]] = phi i64 [ [[TMP11]], [[BB4]] ], [ 0, [[BB_NPH3_SPLIT]] ] +; CHECK-NEXT: [[LSR_IV1]] = bitcast double* [[LSR_IV]] to i1* +; CHECK-NEXT: br i1 true, label [[BB_NPH]], label [[BB3]] +; CHECK: return: +; CHECK-NEXT: ret void +; entry: - %tmp = icmp sgt i64 %n, 0 ; [#uses=1] - br i1 %tmp, label %bb.nph3, label %return + %tmp = icmp sgt i64 %n, 0 ; [#uses=1] + br i1 %tmp, label %bb.nph3, label %return bb.nph: ; preds = %bb2.preheader - %tmp1 = mul i64 %tmp16, %i.02 ; [#uses=1] - %tmp2 = mul i64 %tmp19, %i.02 ; [#uses=1] - br label %bb1 + %tmp1 = mul i64 %tmp16, %i.02 ; [#uses=1] + %tmp2 = mul i64 %tmp19, %i.02 ; [#uses=1] + br label %bb1 bb1: ; preds = %bb2, %bb.nph - %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; [#uses=3] - %tmp3 = add i64 %j.01, %tmp1 ; [#uses=1] - %tmp4 = add i64 %j.01, %tmp2 ; [#uses=1] - %z0 = add i64 %tmp3, 5203 - %tmp5 = getelementptr double, double* %p, i64 %z0 ; [#uses=1] - %tmp6 = load double, double* %tmp5, align 8 ; [#uses=1] - %tmp7 = fdiv double %tmp6, 2.100000e+00 ; [#uses=1] - %z1 = add i64 %tmp4, 5203 - %tmp8 = getelementptr double, double* %p, i64 %z1 ; [#uses=1] - store double %tmp7, double* %tmp8, align 8 - %tmp9 = add i64 %j.01, 1 ; [#uses=2] - br label %bb2 + %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; [#uses=3] + %tmp3 = add i64 %j.01, %tmp1 ; [#uses=1] + %tmp4 = add i64 %j.01, %tmp2 ; [#uses=1] + %z0 = add i64 %tmp3, 5203 + %tmp5 = getelementptr double, double* %p, i64 %z0 ; [#uses=1] + %tmp6 = load double, double* %tmp5, align 8 ; [#uses=1] + %tmp7 = fdiv double %tmp6, 2.100000e+00 ; [#uses=1] + %z1 = add i64 %tmp4, 5203 + %tmp8 = getelementptr double, double* %p, i64 %z1 ; [#uses=1] + store double %tmp7, double* %tmp8, align 8 + %tmp9 = add i64 %j.01, 1 ; [#uses=2] + br label %bb2 bb2: ; preds = %bb1 - %tmp10 = icmp slt i64 %tmp9, %m ; [#uses=1] - br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge + %tmp10 = icmp slt i64 %tmp9, %m ; [#uses=1] + br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge bb2.bb3_crit_edge: ; preds = %bb2 - br label %bb3 + br label %bb3 bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge - %tmp11 = add i64 %i.02, 1 ; [#uses=2] - br label %bb4 + %tmp11 = add i64 %i.02, 1 ; [#uses=2] + br label %bb4 bb4: ; preds = %bb3 - %tmp12 = icmp slt i64 %tmp11, %n ; [#uses=1] - br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge + %tmp12 = icmp slt i64 %tmp11, %n ; [#uses=1] + br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge bb4.return_crit_edge: ; preds = %bb4 - br label %bb4.return_crit_edge.split + br label %bb4.return_crit_edge.split bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge - br label %return + br label %return bb.nph3: ; preds = %entry - %tmp13 = icmp sgt i64 %m, 0 ; [#uses=1] - %tmp14 = mul i64 %n, 37 ; [#uses=1] - %tmp15 = mul i64 %tmp14, %o ; [#uses=1] - %tmp16 = mul i64 %tmp15, %q ; [#uses=1] - %tmp17 = mul i64 %n, 37 ; [#uses=1] - %tmp18 = mul i64 %tmp17, %o ; [#uses=1] - %tmp19 = mul i64 %tmp18, %q ; [#uses=1] - br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split + %tmp13 = icmp sgt i64 %m, 0 ; [#uses=1] + %tmp14 = mul i64 %n, 37 ; [#uses=1] + %tmp15 = mul i64 %tmp14, %o ; [#uses=1] + %tmp16 = mul i64 %tmp15, %q ; [#uses=1] + %tmp17 = mul i64 %n, 37 ; [#uses=1] + %tmp18 = mul i64 %tmp17, %o ; [#uses=1] + %tmp19 = mul i64 %tmp18, %q ; [#uses=1] + br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split bb.nph3.split: ; preds = %bb.nph3 - br label %bb2.preheader + br label %bb2.preheader bb2.preheader: ; preds = %bb.nph3.split, %bb4 - %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; [#uses=3] - br i1 true, label %bb.nph, label %bb3 + %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; [#uses=3] + br i1 true, label %bb.nph, label %bb3 return: ; preds = %bb4.return_crit_edge.split, %entry - ret void + ret void } diff --git a/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll index 3836030f3c0eef..6541b961c11c2e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -opaque-pointers=0 < %s -loop-reduce -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -11,16 +12,42 @@ target triple = "thumbv6m-arm-none-eabi" ; "[LSR] Narrow search space by filtering non-optimal formulae with the ; same ScaledReg and Scale." ; -; Due to a bug in ARMTargetLowering::isLegalAddressingMode LSR got +; Due to a bug in ARMTargetLowering::isLegalAddressingMode LSR got ; 4*reg({0,+,-1}) and -4*reg({0,+,-1}) had the same cost for the Thumb1 target. ; Another issue was that LSR got that -1*reg was free for the Thumb1 target. ; Test case 01: -1*reg is not free for the Thumb1 target. -; -; CHECK-LABEL: @negativeOneCase -; CHECK-NOT: mul -; CHECK: ret i8 define i8* @negativeOneCase(i8* returned %a, i8* nocapture readonly %b, i32 %n) nounwind { +; CHECK-LABEL: define i8* @negativeOneCase +; CHECK-SAME: (i8* returned [[A:%.*]], i8* nocapture readonly [[B:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 -1 +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_0]], i32 1 +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[P_0]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[SCEVGEP5]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]] +; CHECK: while.cond2.preheader: +; CHECK-NEXT: br label [[WHILE_COND2:%.*]] +; CHECK: while.cond2: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[WHILE_BODY5:%.*]] ], [ 0, [[WHILE_COND2_PREHEADER]] ] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[B]], i32 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, i8* [[INCDEC_PTR]], i32 [[LSR_IV]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[N]], [[LSR_IV]] +; CHECK-NEXT: br i1 [[CMP3]], label [[WHILE_END8:%.*]], label [[WHILE_BODY5]] +; CHECK: while.body5: +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[SCEVGEP1]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[SCEVGEP3]], align 1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 +; CHECK-NEXT: br label [[WHILE_COND2]] +; CHECK: while.end8: +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[INCDEC_PTR]], i32 [[N]] +; CHECK-NEXT: store i8 0, i8* [[SCEVGEP4]], align 1 +; CHECK-NEXT: ret i8* [[A]] +; entry: %add.ptr = getelementptr inbounds i8, i8* %a, i32 -1 br label %while.cond @@ -58,11 +85,64 @@ while.end8: ; preds = %while.cond2 ; Test case 02: 4*reg({0,+,-1}) and -4*reg({0,+,-1}) are not supported for ; the Thumb1 target. -; -; CHECK-LABEL: @negativeFourCase -; CHECK-NOT: mul -; CHECK: ret void define void @negativeFourCase(i8* %ptr1, i32* %ptr2) nounwind { +; CHECK-LABEL: define void @negativeFourCase +; CHECK-SAME: (i8* [[PTR1:%.*]], i32* [[PTR2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND6_PREHEADER_US_I_I:%.*]] +; CHECK: for.cond6.preheader.us.i.i: +; CHECK-NEXT: [[ADDR_0108_US_I_I:%.*]] = phi i8* [ [[SCEVGEP_I_I:%.*]], [[IF_END48_US_I_I:%.*]] ], [ [[PTR1]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INC49_US_I_I:%.*]] = phi i32 [ [[INC50_US_I_I:%.*]], [[IF_END48_US_I_I]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[C1_0104_US_I_I:%.*]] = phi i32* [ [[C0_0103_US_I_I:%.*]], [[IF_END48_US_I_I]] ], [ [[PTR2]], [[ENTRY]] ] +; CHECK-NEXT: [[C0_0103_US_I_I]] = phi i32* [ [[C1_0104_US_I_I]], [[IF_END48_US_I_I]] ], [ [[PTR2]], [[ENTRY]] ] +; CHECK-NEXT: [[C0_0103_US_I_I1:%.*]] = bitcast i32* [[C0_0103_US_I_I]] to i8* +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[C1_0104_US_I_I]], i32 -1 +; CHECK-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8* +; CHECK-NEXT: [[C1_0104_US_I_I7:%.*]] = bitcast i32* [[C1_0104_US_I_I]] to i8* +; CHECK-NEXT: br label [[FOR_BODY8_US_I_I:%.*]] +; CHECK: if.end48.us.i.i: +; CHECK-NEXT: [[SCEVGEP_I_I]] = getelementptr i8, i8* [[ADDR_0108_US_I_I]], i32 256 +; CHECK-NEXT: [[INC50_US_I_I]] = add nuw nsw i32 [[INC49_US_I_I]], 1 +; CHECK-NEXT: [[EXITCOND110_I_I:%.*]] = icmp eq i32 [[INC50_US_I_I]], 256 +; CHECK-NEXT: br i1 [[EXITCOND110_I_I]], label [[EXIT_I:%.*]], label [[FOR_COND6_PREHEADER_US_I_I]] +; CHECK: for.body8.us.i.i: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_INC_US_I_I:%.*]] ], [ 0, [[FOR_COND6_PREHEADER_US_I_I]] ] +; CHECK-NEXT: [[ADDR_198_US_I_I:%.*]] = phi i8* [ [[ADDR_0108_US_I_I]], [[FOR_COND6_PREHEADER_US_I_I]] ], [ [[INCDEC_PTR_US_I_I:%.*]], [[FOR_INC_US_I_I]] ] +; CHECK-NEXT: [[INC_196_US_I_I:%.*]] = phi i32 [ 0, [[FOR_COND6_PREHEADER_US_I_I]] ], [ [[INC_2_US_I_I:%.*]], [[FOR_INC_US_I_I]] ] +; CHECK-NEXT: [[INCDEC_PTR_US_I_I]] = getelementptr inbounds i8, i8* [[ADDR_198_US_I_I]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ADDR_198_US_I_I]], align 1 +; CHECK-NEXT: [[CMP9_US_I_I:%.*]] = icmp eq i8 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[CMP9_US_I_I]], label [[IF_END37_US_I_I:%.*]], label [[IF_ELSE_US_I_I:%.*]] +; CHECK: if.else.us.i.i: +; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr i8, i8* [[C1_0104_US_I_I7]], i32 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP1011:%.*]] = bitcast i8* [[SCEVGEP10]] to i32* +; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i32, i32* [[SCEVGEP1011]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP12]], align 4 +; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[C1_0104_US_I_I7]], i32 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP89:%.*]] = bitcast i8* [[SCEVGEP8]] to i32* +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SCEVGEP89]], align 4 +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[SCEVGEP34]], i32 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast i8* [[SCEVGEP5]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SCEVGEP56]], align 4 +; CHECK-NEXT: br label [[IF_END37_US_I_I]] +; CHECK: if.end37.us.i.i: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP3]], [[IF_ELSE_US_I_I]] ], [ 0, [[FOR_BODY8_US_I_I]] ] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[C0_0103_US_I_I1]], i32 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i8* [[SCEVGEP]] to i32* +; CHECK-NEXT: store i32 [[TMP4]], i32* [[SCEVGEP2]], align 4 +; CHECK-NEXT: [[INC_US_I_I:%.*]] = add nsw i32 [[INC_196_US_I_I]], 1 +; CHECK-NEXT: [[CMP38_US_I_I:%.*]] = icmp sgt i32 [[INC_196_US_I_I]], 6 +; CHECK-NEXT: br i1 [[CMP38_US_I_I]], label [[IF_THEN40_US_I_I:%.*]], label [[FOR_INC_US_I_I]] +; CHECK: if.then40.us.i.i: +; CHECK-NEXT: br label [[FOR_INC_US_I_I]] +; CHECK: for.inc.us.i.i: +; CHECK-NEXT: [[INC_2_US_I_I]] = phi i32 [ 0, [[IF_THEN40_US_I_I]] ], [ [[INC_US_I_I]], [[IF_END37_US_I_I]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i32 [[LSR_IV]], 4 +; CHECK-NEXT: [[EXITCOND_I_I:%.*]] = icmp eq i32 1024, [[LSR_IV_NEXT]] +; CHECK-NEXT: br i1 [[EXITCOND_I_I]], label [[IF_END48_US_I_I]], label [[FOR_BODY8_US_I_I]] +; CHECK: exit.i: +; CHECK-NEXT: ret void +; entry: br label %for.cond6.preheader.us.i.i diff --git a/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll b/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll index 2e8c1772ae10a6..ac47e4ee47dfb8 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; PR41445: This test checks the case when LSR split critical edge ; and phi node has other pending fixup operands @@ -12,6 +13,103 @@ target triple = "x86_64-unknown-linux-gnu" ; All the other PHI inputs besides %tmp1 go to a new phi node. ; This test checks that LSR is still able to rewrite %tmp2, %tmp3, %tmp4. define i32 @foo(i32* %A, i32 %t) { +; CHECK-LABEL: define i32 @foo +; CHECK-SAME: (i32* [[A:%.*]], i32 [[T:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_32:%.*]] +; CHECK: loop.exit.loopexitsplitsplitsplit: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1 +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]] +; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge: +; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] +; CHECK: loop.exit.loopexitsplitsplit: +; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]] +; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge: +; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1 +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]] +; CHECK: loop.exit.loopexitsplit: +; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK: then.34.loop.exit.loopexit_crit_edge: +; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2 +; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]] +; CHECK: loop.exit.loopexit: +; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH:%.*]] = phi i64 [ [[TMP2]], [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLIT]] ] +; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] +; CHECK: loop.exit: +; CHECK-NEXT: [[INDVARS_IV_LCSSA:%.*]] = phi i64 [ 48, [[THEN_8:%.*]] ], [ 49, [[THEN_8_1:%.*]] ], [ [[INDVARS_IV_LCSSA_PH]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV_LCSSA]] to i32 +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ] +; CHECK-NEXT: ret i32 [[I_0_LCSSA]] +; CHECK: loop.32: +; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ] +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i32, i32* [[SCEVGEP7]], i64 -1 +; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, i32* [[SCEVGEP8]], align 4 +; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]] +; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]] +; CHECK: then.34: +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[SCEVGEP5]], i64 -2 +; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, i32* [[SCEVGEP6]], align 4 +; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]] +; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]] +; CHECK: ifmerge.34: +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]] +; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, i32* [[SCEVGEP4]], align 4 +; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]] +; CHECK-NEXT: [[CMP_39:%.*]] = icmp slt i32 [[GEPLOAD]], [[T]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]] +; CHECK: ifmerge.38: +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[SCEVGEP2]], i64 1 +; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, i32* [[SCEVGEP3]], align 4 +; CHECK-NEXT: [[CMP_42:%.*]] = icmp sgt i32 [[GEPLOAD24]], [[T]] +; CHECK-NEXT: [[CMP_43:%.*]] = icmp slt i32 [[GEPLOAD20]], [[T]] +; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]] +; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]] +; CHECK: ifmerge.42: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 2 +; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, i32* [[SCEVGEP1]], align 4 +; CHECK-NEXT: [[CMP_46:%.*]] = icmp sgt i32 [[GEPLOAD28]], [[T]] +; CHECK-NEXT: [[CMP_47:%.*]] = icmp slt i32 [[GEPLOAD24]], [[T]] +; CHECK-NEXT: [[OR_COND56:%.*]] = and i1 [[CMP_46]], [[CMP_47]] +; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]] +; CHECK: ifmerge.46: +; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 +; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12 +; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]] +; CHECK: loop.25: +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 49 +; CHECK-NEXT: [[GEPLOAD32:%.*]] = load i32, i32* [[ARRAYIDX31]], align 4 +; CHECK-NEXT: [[CMP_8:%.*]] = icmp sgt i32 [[GEPLOAD32]], [[T]] +; CHECK-NEXT: br i1 [[CMP_8]], label [[THEN_8]], label [[IFMERGE_8]] +; CHECK: then.8: +; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 +; CHECK-NEXT: [[GEPLOAD34:%.*]] = load i32, i32* [[ARRAYIDX33]], align 4 +; CHECK-NEXT: [[CMP_15:%.*]] = icmp slt i32 [[GEPLOAD34]], [[T]] +; CHECK-NEXT: br i1 [[CMP_15]], label [[LOOP_EXIT]], label [[IFMERGE_8]] +; CHECK: ifmerge.8: +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 50 +; CHECK-NEXT: [[GEPLOAD32_1:%.*]] = load i32, i32* [[ARRAYIDX31_1]], align 4 +; CHECK-NEXT: [[CMP_8_1:%.*]] = icmp sgt i32 [[GEPLOAD32_1]], [[T]] +; CHECK-NEXT: br i1 [[CMP_8_1]], label [[THEN_8_1]], label [[FOR_END]] +; CHECK: then.8.1: +; CHECK-NEXT: [[ARRAYIDX33_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 49 +; CHECK-NEXT: [[GEPLOAD34_1:%.*]] = load i32, i32* [[ARRAYIDX33_1]], align 4 +; CHECK-NEXT: [[CMP_15_1:%.*]] = icmp slt i32 [[GEPLOAD34_1]], [[T]] +; CHECK-NEXT: br i1 [[CMP_15_1]], label [[LOOP_EXIT]], label [[FOR_END]] +; entry: br label %loop.32 @@ -25,8 +123,6 @@ for.end: ; preds = %then.8.1, %ifmerge. ret i32 %i.0.lcssa ; shl instruction will be dead eliminated when all it's uses will be rewritten. -; CHECK-LABEL: loop.32: -; CHECK-NOT: shl loop.32: ; preds = %ifmerge.46, %entry %i1.i64.0 = phi i64 [ 0, %entry ], [ %nextivloop.32, %ifmerge.46 ] %tmp1 = shl i64 %i1.i64.0, 2 @@ -36,7 +132,6 @@ loop.32: ; preds = %ifmerge.46, %entry %cmp.34 = icmp sgt i32 %gepload, %t br i1 %cmp.34, label %then.34, label %ifmerge.34 -; CHECK-LABEL: then.34: then.34: ; preds = %loop.32 %arrayIdx17 = getelementptr inbounds i32, i32* %A, i64 %tmp1 %gepload18 = load i32, i32* %arrayIdx17, align 4