-
Notifications
You must be signed in to change notification settings - Fork 12.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[RISCV] Convert AVLs with vlenb to VLMAX where possible (#97800)
Given an AVL that's computed from vlenb, if it's equal to VLMAX then we can replace it with the VLMAX sentinel value. The main motiviation is to be able to express an EVL of VLMAX in VP intrinsics whilst emitting vsetvli a0, zero, so that we can replace llvm.riscv.masked.strided.{load,store} with their VP counterparts. This is done in RISCVVectorPeephole (previously RISCVFoldMasks, renamed to account for the fact that it no longer just folds masks) instead of SelectionDAG since there are multiple places places where VP nodes are lowered that would have need to have been handled. This also avoids doing it in RISCVInsertVSETVLI as it's much harder to lookup the value of the AVL, and in RISCVVectorPeephole we can take advantage of DeadMachineInstrElim to remove any leftover PseudoReadVLENBs.
- Loading branch information
Showing
12 changed files
with
170 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s | ||
|
||
define <vscale x 1 x i1> @sew1_srli(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b) { | ||
; CHECK-LABEL: sew1_srli: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma | ||
; CHECK-NEXT: vmand.mm v0, v0, v8 | ||
; CHECK-NEXT: ret | ||
%vlmax = call i32 @llvm.vscale() | ||
%x = call <vscale x 1 x i1> @llvm.vp.and.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> splat (i1 true), i32 %vlmax) | ||
ret <vscale x 1 x i1> %x | ||
} | ||
|
||
define <vscale x 1 x i64> @sew64_srli(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b) { | ||
; CHECK-LABEL: sew64_srli: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma | ||
; CHECK-NEXT: vadd.vv v8, v8, v9 | ||
; CHECK-NEXT: ret | ||
%vlmax = call i32 @llvm.vscale() | ||
%x = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 true), i32 %vlmax) | ||
ret <vscale x 1 x i64> %x | ||
} | ||
|
||
define <vscale x 8 x i64> @sew64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) { | ||
; CHECK-LABEL: sew64: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma | ||
; CHECK-NEXT: vadd.vv v8, v8, v16 | ||
; CHECK-NEXT: ret | ||
%vscale = call i32 @llvm.vscale() | ||
%vlmax = shl i32 %vscale, 3 | ||
%x = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 true), i32 %vlmax) | ||
ret <vscale x 8 x i64> %x | ||
} | ||
|
||
define <vscale x 16 x i32> @sew32_sll(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) { | ||
; CHECK-LABEL: sew32_sll: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma | ||
; CHECK-NEXT: vadd.vv v8, v8, v16 | ||
; CHECK-NEXT: ret | ||
%vscale = call i32 @llvm.vscale() | ||
%vlmax = shl i32 %vscale, 4 | ||
%x = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 true), i32 %vlmax) | ||
ret <vscale x 16 x i32> %x | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.