Skip to content

Commit

Permalink
[LSR] Regenerate test checks (NFC)
Browse files Browse the repository at this point in the history
  • Loading branch information
nikic committed Jul 12, 2023
1 parent 5041442 commit 7a78756
Show file tree
Hide file tree
Showing 5 changed files with 554 additions and 150 deletions.
231 changes: 187 additions & 44 deletions llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -opaque-pointers=0 -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9

; @simple is the most basic chain of address induction variables. Chaining
; saves at least one register and avoids complex addressing and setup
; code.
;
; A9: @simple
; no expensive address computation in the preheader
; A9: lsl
; A9-NOT: lsl
; A9: %loop
; no complex address modes
; A9-NOT: lsl
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
; A9-LABEL: simple:
; A9: @ %bb.0: @ %entry
; A9-NEXT: .save {r4, r5, r6, lr}
; A9-NEXT: push {r4, r5, r6, lr}
; A9-NEXT: mov r3, r0
; A9-NEXT: lsls r2, r2, #2
; A9-NEXT: movs r0, #0
; A9-NEXT: .LBB0_1: @ %loop
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
; A9-NEXT: add.w lr, r3, r2
; A9-NEXT: ldr.w r12, [r3, r2]
; A9-NEXT: ldr r3, [r3]
; A9-NEXT: add.w r4, lr, r2
; A9-NEXT: ldr.w r6, [lr, r2]
; A9-NEXT: add r0, r3
; A9-NEXT: adds r3, r4, r2
; A9-NEXT: add r0, r12
; A9-NEXT: ldr r5, [r4, r2]
; A9-NEXT: add r0, r6
; A9-NEXT: add r3, r2
; A9-NEXT: add r0, r5
; A9-NEXT: cmp r3, r1
; A9-NEXT: bne .LBB0_1
; A9-NEXT: @ %bb.2: @ %exit
; A9-NEXT: pop {r4, r5, r6, pc}
entry:
br label %loop
loop:
Expand All @@ -37,15 +58,34 @@ exit:

; @user is not currently chained because the IV is live across memory ops.
;
; A9: @user
; stride multiples computed in the preheader
; A9: lsl
; A9: lsl
; A9: %loop
; complex address modes
; A9: lsl
; A9: lsl
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
; A9-LABEL: user:
; A9: @ %bb.0: @ %entry
; A9-NEXT: .save {r4, r5, r6, r7, lr}
; A9-NEXT: push {r4, r5, r6, r7, lr}
; A9-NEXT: add.w r3, r2, r2, lsl #1
; A9-NEXT: lsl.w r12, r2, #4
; A9-NEXT: lsl.w lr, r3, #2
; A9-NEXT: movs r3, #0
; A9-NEXT: .LBB1_1: @ %loop
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
; A9-NEXT: ldr r4, [r0]
; A9-NEXT: ldr.w r5, [r0, r2, lsl #3]
; A9-NEXT: ldr.w r6, [r0, r2, lsl #2]
; A9-NEXT: add r3, r4
; A9-NEXT: ldr.w r7, [r0, lr]
; A9-NEXT: add r3, r6
; A9-NEXT: add r3, r5
; A9-NEXT: add r3, r7
; A9-NEXT: str r3, [r0]
; A9-NEXT: add r0, r12
; A9-NEXT: cmp r0, r1
; A9-NEXT: bne .LBB1_1
; A9-NEXT: @ %bb.2: @ %exit
; A9-NEXT: mov r0, r3
; A9-NEXT: pop {r4, r5, r6, r7, pc}
entry:
br label %loop
loop:
Expand Down Expand Up @@ -75,16 +115,43 @@ exit:
; used to do, and exactly what we don't want to do. LSR's new IV
; chaining feature should now undo the damage.
;
; A9: extrastride:
; no spills
; A9-NOT: str
; only one stride multiple in the preheader
; A9: lsl
; A9-NOT: {{str r|lsl}}
; A9: %for.body{{$}}
; no complex address modes or reloads
; A9-NOT: {{ldr .*[sp]|lsl}}
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
; A9-LABEL: extrastride:
; A9: @ %bb.0: @ %entry
; A9-NEXT: .save {r4, r5, r6, r7, lr}
; A9-NEXT: push {r4, r5, r6, r7, lr}
; A9-NEXT: ldr.w r12, [sp, #24]
; A9-NEXT: cmp.w r12, #0
; A9-NEXT: beq .LBB2_3
; A9-NEXT: @ %bb.1: @ %for.body.lr.ph
; A9-NEXT: ldr r4, [sp, #20]
; A9-NEXT: add.w lr, r3, r1
; A9-NEXT: lsls r3, r4, #2
; A9-NEXT: .LBB2_2: @ %for.body
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
; A9-NEXT: adds r5, r0, r1
; A9-NEXT: ldr r4, [r0, r1]
; A9-NEXT: ldr r0, [r0]
; A9-NEXT: subs.w r12, r12, #1
; A9-NEXT: ldr r6, [r5, r1]
; A9-NEXT: add r5, r1
; A9-NEXT: add r0, r4
; A9-NEXT: ldr r7, [r5, r1]
; A9-NEXT: add r5, r1
; A9-NEXT: add r0, r6
; A9-NEXT: ldr r4, [r5, r1]
; A9-NEXT: add r0, r7
; A9-NEXT: add r0, r4
; A9-NEXT: str r0, [r2]
; A9-NEXT: add.w r0, r5, r1
; A9-NEXT: add r2, r3
; A9-NEXT: add r0, lr
; A9-NEXT: bne .LBB2_2
; A9-NEXT: .LBB2_3: @ %for.end
; A9-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp8 = icmp eq i32 %z, 0
br i1 %cmp8, label %for.end, label %for.body.lr.ph
Expand Down Expand Up @@ -136,10 +203,38 @@ for.end: ; preds = %for.body, %entry
; }
; where 's' can be folded into the addressing mode.
; Consequently, we should *not* form any chains.
;
; A9: foldedidx:
; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
; A9-LABEL: foldedidx:
; A9: @ %bb.0: @ %entry
; A9-NEXT: .save {r4, r5, r6, lr}
; A9-NEXT: push {r4, r5, r6, lr}
; A9-NEXT: mov.w lr, #0
; A9-NEXT: .LBB3_1: @ %for.body
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
; A9-NEXT: ldrb.w r12, [r0, lr]
; A9-NEXT: add.w r4, r1, lr
; A9-NEXT: ldrb.w r3, [r1, lr]
; A9-NEXT: add r3, r12
; A9-NEXT: strb.w r3, [r2, lr]
; A9-NEXT: add.w r3, r0, lr
; A9-NEXT: ldrb.w r12, [r3, #1]
; A9-NEXT: ldrb r5, [r4, #1]
; A9-NEXT: add r12, r5
; A9-NEXT: add.w r5, r2, lr
; A9-NEXT: strb.w r12, [r5, #1]
; A9-NEXT: add.w lr, lr, #4
; A9-NEXT: cmp.w lr, #400
; A9-NEXT: ldrb.w r12, [r3, #2]
; A9-NEXT: ldrb r6, [r4, #2]
; A9-NEXT: add r6, r12
; A9-NEXT: strb r6, [r5, #2]
; A9-NEXT: ldrb r3, [r3, #3]
; A9-NEXT: ldrb r6, [r4, #3]
; A9-NEXT: add r3, r6
; A9-NEXT: strb r3, [r5, #3]
; A9-NEXT: bne .LBB3_1
; A9-NEXT: @ %bb.2: @ %for.end
; A9-NEXT: pop {r4, r5, r6, pc}
entry:
br label %for.body

Expand Down Expand Up @@ -200,14 +295,45 @@ for.end: ; preds = %for.body
;
; Loads and stores should use post-increment addressing, no add's or add.w's.
; Most importantly, there should be no spills or reloads!
;
; A9: testNeon:
; A9: %.lr.ph
; A9-NOT: lsl.w
; A9-NOT: {{ldr|str|adds|add r}}
; A9-NOT: add.w r
; A9: bne
define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
; A9-LABEL: testNeon:
; A9: @ %bb.0:
; A9-NEXT: .save {r4, r5, r7, lr}
; A9-NEXT: push {r4, r5, r7, lr}
; A9-NEXT: vmov.i32 q8, #0x0
; A9-NEXT: cmp r2, #1
; A9-NEXT: blt .LBB4_4
; A9-NEXT: @ %bb.1: @ %.lr.ph
; A9-NEXT: movs r5, #0
; A9-NEXT: movw r4, #64464
; A9-NEXT: sub.w r12, r5, r2, lsl #6
; A9-NEXT: sub.w lr, r1, r1, lsl #4
; A9-NEXT: movt r4, #65535
; A9-NEXT: mov r5, r3
; A9-NEXT: .LBB4_2: @ =>This Inner Loop Header: Depth=1
; A9-NEXT: vld1.64 {d18}, [r0], r1
; A9-NEXT: subs r2, #1
; A9-NEXT: vld1.64 {d19}, [r0], r1
; A9-NEXT: vst1.8 {d18, d19}, [r5]!
; A9-NEXT: vld1.64 {d20}, [r0], r1
; A9-NEXT: vld1.64 {d21}, [r0], r1
; A9-NEXT: vst1.8 {d20, d21}, [r5]!
; A9-NEXT: vld1.64 {d22}, [r0], r1
; A9-NEXT: vadd.i8 q9, q9, q10
; A9-NEXT: vld1.64 {d23}, [r0], r1
; A9-NEXT: vst1.8 {d22, d23}, [r5]!
; A9-NEXT: vld1.64 {d20}, [r0], r1
; A9-NEXT: vadd.i8 q9, q9, q11
; A9-NEXT: vld1.64 {d21}, [r0], lr
; A9-NEXT: vadd.i8 q9, q9, q10
; A9-NEXT: vadd.i8 q8, q8, q9
; A9-NEXT: vst1.8 {d20, d21}, [r5], r4
; A9-NEXT: bne .LBB4_2
; A9-NEXT: @ %bb.3: @ %._crit_edge
; A9-NEXT: add.w r3, r3, r12, lsl #4
; A9-NEXT: .LBB4_4:
; A9-NEXT: vst1.32 {d16, d17}, [r3]
; A9-NEXT: pop {r4, r5, r7, pc}
%1 = icmp sgt i32 %limit, 0
br i1 %1, label %.lr.ph, label %45

Expand Down Expand Up @@ -284,24 +410,41 @@ declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
; Handle chains in which the same offset is used for both loads and
; stores to the same array.
; rdar://11410078.
;
; A9: @testReuse
; A9: %for.body
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
; A9: bne
define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
; A9-LABEL: testReuse:
; A9: @ %bb.0: @ %entry
; A9-NEXT: sub.w r12, r0, r1, lsl #2
; A9-NEXT: sub.w r0, r1, r1, lsl #2
; A9-NEXT: lsls r2, r0, #1
; A9-NEXT: movs r3, #0
; A9-NEXT: .LBB5_1: @ %for.body
; A9-NEXT: @ =>This Inner Loop Header: Depth=1
; A9-NEXT: add.w r0, r12, r3
; A9-NEXT: adds r3, #8
; A9-NEXT: vld1.8 {d16}, [r0], r1
; A9-NEXT: cmp r3, #32
; A9-NEXT: vld1.8 {d17}, [r0], r1
; A9-NEXT: vhadd.u8 d16, d16, d17
; A9-NEXT: vld1.8 {d18}, [r0], r1
; A9-NEXT: vhadd.u8 d17, d17, d18
; A9-NEXT: vld1.8 {d19}, [r0], r1
; A9-NEXT: vhadd.u8 d18, d18, d19
; A9-NEXT: vld1.8 {d20}, [r0], r1
; A9-NEXT: vhadd.u8 d19, d19, d20
; A9-NEXT: vld1.8 {d21}, [r0], r1
; A9-NEXT: vhadd.u8 d20, d20, d21
; A9-NEXT: vld1.8 {d22}, [r0], r1
; A9-NEXT: vhadd.u8 d21, d21, d22
; A9-NEXT: vld1.8 {d23}, [r0], r2
; A9-NEXT: vst1.8 {d16}, [r0], r1
; A9-NEXT: vst1.8 {d17}, [r0], r1
; A9-NEXT: vst1.8 {d18}, [r0], r1
; A9-NEXT: vst1.8 {d19}, [r0], r1
; A9-NEXT: vst1.8 {d20}, [r0], r1
; A9-NEXT: vst1.8 {d21}, [r0]
; A9-NEXT: bne .LBB5_1
; A9-NEXT: @ %bb.2: @ %for.end
; A9-NEXT: bx lr
entry:
%mul = shl nsw i32 %stride, 2
%idx.neg = sub i32 0, %mul
Expand Down
Loading

0 comments on commit 7a78756

Please sign in to comment.