Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ARM] Switch to soft promoting half types. #80440

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9055,7 +9055,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);

if (getTypeAction(*DAG.getContext(), EltVT) ==
TargetLowering::TypePromoteFloat) {
TargetLowering::TypeSoftPromoteHalf) {
// INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
// but the type system will try to do that if we don't intervene.
// Reinterpret any such vector-element insertion as one with the
Expand All @@ -9065,7 +9065,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
assert(getTypeAction(*DAG.getContext(), IEltVT) !=
TargetLowering::TypePromoteFloat);
TargetLowering::TypeSoftPromoteHalf);

SDValue VecIn = Op.getOperand(0);
EVT VecVT = VecIn.getValueType();
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,10 @@ class VectorType;
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;

bool softPromoteHalfType() const override { return true; }

bool useFPRegsForHalfType() const override { return true; }

protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
Expand Down
1,098 changes: 490 additions & 608 deletions llvm/test/CodeGen/ARM/aes-erratum-fix.ll

Large diffs are not rendered by default.

155 changes: 95 additions & 60 deletions llvm/test/CodeGen/ARM/arm-half-promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,113 @@

define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() {
; CHECK-LABEL: _f1
; CHECK: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: vmov.i32 d8, #0x0
; CHECK-NEXT: vmov.i32 d0, #0x0
; CHECK-NEXT: vmov.i32 d1, #0x0
; CHECK-NEXT: vmov.i32 d2, #0x0
; CHECK-NEXT: vmov.i32 d3, #0x0
; CHECK-NEXT: vmov.i32 d4, #0x0
; CHECK-NEXT: vmov.i32 d5, #0x0
; CHECK-NEXT: vmov.i32 d6, #0x0
; CHECK-NEXT: vmov.i32 d7, #0x0
; CHECK-NEXT: vmov.f32 s1, s16
; CHECK-NEXT: vmov.f32 s3, s16
; CHECK-NEXT: vmov.f32 s5, s16
; CHECK-NEXT: vmov.f32 s7, s16
; CHECK-NEXT: vmov.f32 s9, s16
; CHECK-NEXT: vmov.f32 s11, s16
; CHECK-NEXT: vmov.f32 s13, s16
; CHECK-NEXT: vmov.f32 s15, s16
; CHECK-NEXT: vpop {d8}
; CHECK: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q8, #0x0
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vmov d4, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[1]
; CHECK-NEXT: vmov d8, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[2]
; CHECK-NEXT: vmov d5, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[3]
; CHECK-NEXT: vmov d9, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[0]
; CHECK-NEXT: vmov d6, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[1]
; CHECK-NEXT: vmov d10, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[2]
; CHECK-NEXT: vmov d7, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vmov d11, r0, r0
; CHECK: vmov.f32 s0, s8
; CHECK: vmov.f32 s1, s16
; CHECK: vmov.f32 s2, s10
; CHECK: vmov.f32 s3, s18
; CHECK: vmov.f32 s4, s12
; CHECK: vmov.f32 s5, s20
; CHECK: vmov.f32 s6, s14
; CHECK: vmov.f32 s7, s22
; CHECK: vmov.f32 s9, s16
; CHECK: vmov.f32 s11, s18
; CHECK: vmov.f32 s13, s20
; CHECK: vmov.f32 s15, s22
; CHECK: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr

ret { <8 x half>, <8 x half> } zeroinitializer
}

define swiftcc { <8 x half>, <8 x half> } @f2() {
; CHECK-LABEL: _f2
; CHECK: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: vmov.i32 d8, #0x0
; CHECK-NEXT: vmov.i32 d0, #0x0
; CHECK-NEXT: vmov.i32 d1, #0x0
; CHECK-NEXT: vmov.i32 d2, #0x0
; CHECK-NEXT: vmov.i32 d3, #0x0
; CHECK-NEXT: vmov.i32 d4, #0x0
; CHECK-NEXT: vmov.i32 d5, #0x0
; CHECK-NEXT: vmov.i32 d6, #0x0
; CHECK-NEXT: vmov.i32 d7, #0x0
; CHECK-NEXT: vmov.f32 s1, s16
; CHECK-NEXT: vmov.f32 s3, s16
; CHECK-NEXT: vmov.f32 s5, s16
; CHECK-NEXT: vmov.f32 s7, s16
; CHECK-NEXT: vmov.f32 s9, s16
; CHECK-NEXT: vmov.f32 s11, s16
; CHECK-NEXT: vmov.f32 s13, s16
; CHECK-NEXT: vmov.f32 s15, s16
; CHECK-NEXT: vpop {d8}
; CHECK: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q8, #0x0
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vmov d4, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[1]
; CHECK-NEXT: vmov d8, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[2]
; CHECK-NEXT: vmov d5, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[3]
; CHECK-NEXT: vmov d9, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[0]
; CHECK-NEXT: vmov d6, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[1]
; CHECK-NEXT: vmov d10, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[2]
; CHECK-NEXT: vmov d7, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vmov d11, r0, r0
; CHECK: vmov.f32 s0, s8
; CHECK: vmov.f32 s1, s16
; CHECK: vmov.f32 s2, s10
; CHECK: vmov.f32 s3, s18
; CHECK: vmov.f32 s4, s12
; CHECK: vmov.f32 s5, s20
; CHECK: vmov.f32 s6, s14
; CHECK: vmov.f32 s7, s22
; CHECK: vmov.f32 s9, s16
; CHECK: vmov.f32 s11, s18
; CHECK: vmov.f32 s13, s20
; CHECK: vmov.f32 s15, s22
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr

ret { <8 x half>, <8 x half> } zeroinitializer
}

define fastcc { <8 x half>, <8 x half> } @f3() {
; CHECK-LABEL: _f3
; CHECK: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: vmov.i32 d8, #0x0
; CHECK-NEXT: vmov.i32 d0, #0x0
; CHECK-NEXT: vmov.i32 d1, #0x0
; CHECK-NEXT: vmov.i32 d2, #0x0
; CHECK-NEXT: vmov.i32 d3, #0x0
; CHECK-NEXT: vmov.i32 d4, #0x0
; CHECK-NEXT: vmov.i32 d5, #0x0
; CHECK-NEXT: vmov.i32 d6, #0x0
; CHECK-NEXT: vmov.i32 d7, #0x0
; CHECK-NEXT: vmov.f32 s1, s16
; CHECK-NEXT: vmov.f32 s3, s16
; CHECK-NEXT: vmov.f32 s5, s16
; CHECK-NEXT: vmov.f32 s7, s16
; CHECK-NEXT: vmov.f32 s9, s16
; CHECK-NEXT: vmov.f32 s11, s16
; CHECK-NEXT: vmov.f32 s13, s16
; CHECK-NEXT: vmov.f32 s15, s16
; CHECK-NEXT: vpop {d8}
; CHECK: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q8, #0x0
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vmov d4, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[1]
; CHECK-NEXT: vmov d8, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[2]
; CHECK-NEXT: vmov d5, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[3]
; CHECK-NEXT: vmov d9, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[0]
; CHECK-NEXT: vmov d6, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[1]
; CHECK-NEXT: vmov d10, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[2]
; CHECK-NEXT: vmov d7, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vmov d11, r0, r0
; CHECK: vmov.f32 s0, s8
; CHECK: vmov.f32 s1, s16
; CHECK: vmov.f32 s2, s10
; CHECK: vmov.f32 s3, s18
; CHECK: vmov.f32 s4, s12
; CHECK: vmov.f32 s5, s20
; CHECK: vmov.f32 s6, s14
; CHECK: vmov.f32 s7, s22
; CHECK: vmov.f32 s9, s16
; CHECK: vmov.f32 s11, s18
; CHECK: vmov.f32 s13, s20
; CHECK: vmov.f32 s15, s22
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr

ret { <8 x half>, <8 x half> } zeroinitializer
Expand Down
40 changes: 0 additions & 40 deletions llvm/test/CodeGen/ARM/fp16-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,46 +46,6 @@ entry:
}

define <4 x half> @foo_vec(<4 x half> %a) {
; SOFT-LABEL: foo_vec:
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: vmov s0, r3
; SOFT-NEXT: vmov s2, r1
; SOFT-NEXT: vcvtb.f32.f16 s0, s0
; SOFT-NEXT: vmov s4, r0
; SOFT-NEXT: vcvtb.f32.f16 s2, s2
; SOFT-NEXT: vmov s6, r2
; SOFT-NEXT: vcvtb.f32.f16 s4, s4
; SOFT-NEXT: vcvtb.f32.f16 s6, s6
; SOFT-NEXT: vadd.f32 s0, s0, s0
; SOFT-NEXT: vadd.f32 s2, s2, s2
; SOFT-NEXT: vcvtb.f16.f32 s0, s0
; SOFT-NEXT: vadd.f32 s4, s4, s4
; SOFT-NEXT: vcvtb.f16.f32 s2, s2
; SOFT-NEXT: vadd.f32 s6, s6, s6
; SOFT-NEXT: vcvtb.f16.f32 s4, s4
; SOFT-NEXT: vcvtb.f16.f32 s6, s6
; SOFT-NEXT: vmov r0, s4
; SOFT-NEXT: vmov r1, s2
; SOFT-NEXT: vmov r2, s6
; SOFT-NEXT: vmov r3, s0
; SOFT-NEXT: bx lr
;
; HARD-LABEL: foo_vec:
; HARD: @ %bb.0: @ %entry
; HARD-NEXT: vcvtb.f32.f16 s4, s3
; HARD-NEXT: vcvtb.f32.f16 s2, s2
; HARD-NEXT: vcvtb.f32.f16 s6, s1
; HARD-NEXT: vcvtb.f32.f16 s0, s0
; HARD-NEXT: vadd.f32 s2, s2, s2
; HARD-NEXT: vadd.f32 s0, s0, s0
; HARD-NEXT: vcvtb.f16.f32 s2, s2
; HARD-NEXT: vadd.f32 s4, s4, s4
; HARD-NEXT: vcvtb.f16.f32 s0, s0
; HARD-NEXT: vadd.f32 s6, s6, s6
; HARD-NEXT: vcvtb.f16.f32 s3, s4
; HARD-NEXT: vcvtb.f16.f32 s1, s6
; HARD-NEXT: bx lr
;
; FULL-SOFT-LE-LABEL: foo_vec:
; FULL-SOFT-LE: @ %bb.0: @ %entry
; FULL-SOFT-LE-NEXT: vmov d16, r0, r1
Expand Down
Loading
Loading