Subzero. ARM32. Implements vector select. Also piggy-backs necro-comments from cl 1878943009. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1886263004 .
diff --git a/src/DartARM32/assembler_arm.h b/src/DartARM32/assembler_arm.h index 4d205c7..c6e53df 100644 --- a/src/DartARM32/assembler_arm.h +++ b/src/DartARM32/assembler_arm.h
@@ -1406,9 +1406,13 @@ // ARM32::AssemblerARM32::uxt() (uxtb and uxth) // ARM32::AssemblerARM32::vpop() // ARM32::AssemblerARM32::vpush() - // ARM32::AssemblerARM32:rbit() + // ARM32::AssemblerARM32::rbit() + // ARM32::AssemblerARM32::vbslq() // ARM32::AssemblerARM32::veord() // ARM32::AssemblerARM32::vld1qr() + // ARM32::AssemblerARM32::vshlqc + // ARM32::AssemblerARM32::vshrqic + // ARM32::AssemblerARM32::vshrquc // ARM32::AssemblerARM32::vst1qr() // ARM32::AssemblerARM32::vmorqi() // ARM32::AssemblerARM32::vmovqc()
diff --git a/src/IceAssemblerARM32.cpp b/src/IceAssemblerARM32.cpp index d21bd1b..71e5c6a 100644 --- a/src/IceAssemblerARM32.cpp +++ b/src/IceAssemblerARM32.cpp
@@ -2410,6 +2410,18 @@ emitSIMDqqq(VandqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vandq); } +void AssemblerARM32::vbslq(const Operand *OpQd, const Operand *OpQm, + const Operand *OpQn) { + // VBSL (register) - ARM section A8.8.290, encoding A1: + // vbsl <Qd>, <Qn>, <Qm> + // + // 111100110D01nnn0ddd00001N1M1mmm0 where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm. + constexpr const char *Vbslq = "vbslq"; + constexpr IValueT VbslqOpcode = B24 | B20 | B8 | B4; + constexpr Type ElmtTy = IceType_i8; // emits sz=0 + emitSIMDqqq(VbslqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vbslq); +} + void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond) { constexpr const char *Vcmpd = "vcmpd";
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h index b1ca928..ae3b93e 100644 --- a/src/IceAssemblerARM32.h +++ b/src/IceAssemblerARM32.h
@@ -343,6 +343,8 @@ void vandq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); + void vbslq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); + void vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond cond); // Second argument of compare is zero (+0.0).
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp index e2498fe..7c3e288 100644 --- a/src/IceInstARM32.cpp +++ b/src/IceInstARM32.cpp
@@ -704,6 +704,24 @@ assert(!Asm->needsTextFixup()); } +template <> void InstARM32Vbsl::emitIAS(const Cfg *Func) const { + auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); + const Variable *Dest = getDest(); + switch (Dest->getType()) { + default: + llvm::report_fatal_error("Vbsl not defined on type " + + typeStdString(Dest->getType())); + case IceType_v4i1: + case IceType_v8i1: + case IceType_v16i1: + case IceType_v16i8: + case IceType_v8i16: + case IceType_v4i32: + Asm->vbslq(Dest, getSrc(0), getSrc(1)); + } + assert(!Asm->needsTextFixup()); +} + template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const { auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); const Variable *Dest = getDest(); @@ -1496,6 +1514,7 @@ // FP template <> const char *InstARM32Vadd::Opcode = "vadd"; template <> const char *InstARM32Vand::Opcode = "vand"; +template <> const char *InstARM32Vbsl::Opcode = "vbsl"; template <> const char *InstARM32Vdiv::Opcode = "vdiv"; template <> const char *InstARM32Veor::Opcode = "veor"; template <> const char *InstARM32Vmla::Opcode = "vmla";
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h index 3c65037..4a052c4 100644 --- a/src/IceInstARM32.h +++ b/src/IceInstARM32.h
@@ -427,6 +427,7 @@ Vabs, Vadd, Vand, + Vbsl, Vcmp, Vcvt, Vdiv, @@ -992,6 +993,7 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>; using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>; using InstARM32Vand = InstARM32ThreeAddrFP<InstARM32::Vand>; +using InstARM32Vbsl = InstARM32ThreeAddrFP<InstARM32::Vbsl>; using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>; using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index b8fa3b6..3de321e 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp
@@ -861,24 +861,6 @@ } return; } - case Inst::Select: { - Variable *Dest = Instr->getDest(); - const auto DestTy = Dest->getType(); - if (isVectorType(DestTy)) { - auto *SelectInstr = llvm::cast<InstSelect>(Instr); - scalarizeInstruction(Dest, - [this](Variable *Dest, Variable *Src0, - Variable *Src1, Variable *Src2) { - return Context.insert<InstSelect>(Dest, Src0, Src1, - Src2); - }, - llvm::cast<Variable>(SelectInstr->getSrc(0)), - llvm::cast<Variable>(SelectInstr->getSrc(1)), - llvm::cast<Variable>(SelectInstr->getSrc(2))); - SelectInstr->setDeleted(); - } - return; - } } } @@ -5727,12 +5709,39 @@ Operand *SrcF = Instr->getFalseOperand(); Operand *Condition = Instr->getCondition(); - if (isVectorType(DestTy)) { - UnimplementedLoweringError(this, Instr); + if (!isVectorType(DestTy)) { + lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT), + legalizeUndef(SrcF)); return; } - lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT), legalizeUndef(SrcF)); + Type TType = DestTy; + switch (DestTy) { + default: + llvm::report_fatal_error("Unexpected type for vector select."); + case IceType_v4i1: + TType = IceType_v4i32; + break; + case IceType_v8i1: + TType = IceType_v8i16; + break; + case IceType_v16i1: + TType = IceType_v16i8; + break; + case IceType_v4f32: + TType = IceType_v4i32; + break; + case IceType_v4i32: + case IceType_v8i16: + case IceType_v16i8: + break; + } + auto *T = makeReg(TType); + lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); + auto *SrcTR = legalizeToReg(SrcT); + auto *SrcFR = legalizeToReg(SrcF); + _vbsl(T, SrcTR, SrcFR)->setDestRedefined(); + _mov(Dest, T); } void TargetARM32::lowerStore(const InstStore *Instr) {
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h index 9d2b760..2cfa945 100644 --- a/src/IceTargetLoweringARM32.h +++ b/src/IceTargetLoweringARM32.h
@@ -854,6 +854,9 @@ void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vand>(Dest, Src0, Src1); } + InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) { + return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1); + } void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
diff --git a/tests_lit/assembler/arm32/select-vec.ll b/tests_lit/assembler/arm32/select-vec.ll index 8d6165e..415f936 100644 --- a/tests_lit/assembler/arm32/select-vec.ll +++ b/tests_lit/assembler/arm32/select-vec.ll
@@ -1,152 +1,84 @@ ; Test that we handle select on vectors. -; TODO(eholk): This test will need to be updated once comparison is no longer -; scalarized. - ; REQUIRES: allow_dump ; Compile using standalone assembler. ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \ ; RUN: | FileCheck %s --check-prefix=ASM +; Show bytes in assembled standalone code. +; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \ +; RUN: --args -O2 --reg-use=s20 | FileCheck %s --check-prefix=DIS + +; Compile using integrated assembler. +; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \ +; RUN: --reg-use=s20 \ +; RUN: | FileCheck %s --check-prefix=IASM + +; Show bytes in assembled integrated code. +; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \ +; RUN: --args -O2 --reg-use=s20 | FileCheck %s --check-prefix=DIS + define internal <4 x float> @select4float(<4 x i1> %s, <4 x float> %a, <4 x float> %b) { ; ASM-LABEL:select4float: ; DIS-LABEL:00000000 <select4float>: +; IASM-LABEL:select4float: entry: %res = select <4 x i1> %s, <4 x float> %a, <4 x float> %b -; ASM: # q3 = def.pseudo -; ASM-NEXT: vmov.s8 r0, d0[0] -; ASM-NEXT: vmov.f32 s16, s4 -; ASM-NEXT: vmov.f32 s17, s8 -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: vmovne.f32 s17, s16 -; ASM-NEXT: vmov.f32 s12, s17 -; ASM-NEXT: vmov.s8 r0, d0[4] -; ASM-NEXT: vmov.f32 s16, s5 -; ASM-NEXT: vmov.f32 s17, s9 -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: vmovne.f32 s17, s16 -; ASM-NEXT: vmov.f32 s13, s17 -; ASM-NEXT: vmov.s8 r0, d1[0] -; ASM-NEXT: vmov.f32 s16, s6 -; ASM-NEXT: vmov.f32 s17, s10 -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: vmovne.f32 s17, s16 -; ASM-NEXT: vmov.f32 s14, s17 -; ASM-NEXT: vmov.s8 r0, d1[4] -; ASM-NEXT: vmov.f32 s4, s7 -; ASM-NEXT: vmov.f32 s8, s11 -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: vmovne.f32 s8, s4 -; ASM-NEXT: vmov.f32 s15, s8 -; ASM-NEXT: vmov.f32 q0, q3 -; ASM-NEXT: vpop {s16, s17} -; ASM-NEXT: # s16 = def.pseudo -; ASM-NEXT: # s17 = def.pseudo -; ASM-NEXT: bx lr +; ASM: vshl.u32 [[M:.*]], {{.*}}, #31 +; ASM-NEXT: vshr.s32 [[M:.*]], {{.*}}, #31 +; ASM-NEXT: vbsl.i32 [[M]], {{.*}} +; DIS: 0: f2bf0550 +; DIS-NEXT: 4: f2a10050 +; DIS-NEXT: 8: f3120154 +; IASM-NOT: vshl +; IASM-NOT: vshr +; IASM-NOT: vbsl ret <4 x float> %res } define internal <4 x i32> @select4i32(<4 x i1> %s, <4 x i32> %a, <4 x i32> %b) { ; ASM-LABEL:select4i32: -; DIS-LABEL:00000000 <select4i32>: +; DIS-LABEL:00000010 <select4i32>: +; IASM-LABEL:select4i32: entry: %res = select <4 x i1> %s, <4 x i32> %a, <4 x i32> %b -; ASM: # q3 = def.pseudo -; ASM-NEXT: vmov.s8 r0, d0[0] -; ASM-NEXT: vmov.32 r1, d2[0] -; ASM-NEXT: vmov.32 r2, d4[0] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.32 d6[0], r2 -; ASM-NEXT: vmov.s8 r0, d0[4] -; ASM-NEXT: vmov.32 r1, d2[1] -; ASM-NEXT: vmov.32 r2, d4[1] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.32 d6[1], r2 -; ASM-NEXT: vmov.s8 r0, d1[0] -; ASM-NEXT: vmov.32 r1, d3[0] -; ASM-NEXT: vmov.32 r2, d5[0] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.32 d7[0], r2 -; ASM-NEXT: vmov.s8 r0, d1[4] -; ASM-NEXT: vmov.32 r1, d3[1] -; ASM-NEXT: vmov.32 r2, d5[1] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.32 d7[1], r2 -; ASM-NEXT: vmov.i32 q0, q3 -; ASM-NEXT: bx lr +; ASM: vshl.u32 [[M:.*]], {{.*}}, #31 +; ASM-NEXT: vshr.s32 [[M:.*]], {{.*}}, #31 +; ASM-NEXT: vbsl.i32 [[M]], {{.*}} +; DIS: 10: f2bf0550 +; DIS-NEXT: 14: f2a10050 +; DIS_NEXT: 18: f3120154 +; IASM-NOT: vshl +; IASM-NOT: vshr +; IASM-NOT: vbsl ret <4 x i32> %res } define internal <8 x i16> @select8i16(<8 x i1> %s, <8 x i16> %a, <8 x i16> %b) { ; ASM-LABEL:select8i16: -; DIS-LABEL:00000000 <select8i16>: +; DIS-LABEL:00000020 <select8i16>: +; IASM-LABEL:select8i16: entry: %res = select <8 x i1> %s, <8 x i16> %a, <8 x i16> %b -; ASM: # q3 = def.pseudo -; ASM-NEXT: vmov.s8 r0, d0[0] -; ASM-NEXT: vmov.s16 r1, d2[0] -; ASM-NEXT: vmov.s16 r2, d4[0] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d6[0], r2 -; ASM-NEXT: vmov.s8 r0, d0[2] -; ASM-NEXT: vmov.s16 r1, d2[1] -; ASM-NEXT: vmov.s16 r2, d4[1] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d6[1], r2 -; ASM-NEXT: vmov.s8 r0, d0[4] -; ASM-NEXT: vmov.s16 r1, d2[2] -; ASM-NEXT: vmov.s16 r2, d4[2] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d6[2], r2 -; ASM-NEXT: vmov.s8 r0, d0[6] -; ASM-NEXT: vmov.s16 r1, d2[3] -; ASM-NEXT: vmov.s16 r2, d4[3] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d6[3], r2 -; ASM-NEXT: vmov.s8 r0, d1[0] -; ASM-NEXT: vmov.s16 r1, d3[0] -; ASM-NEXT: vmov.s16 r2, d5[0] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d7[0], r2 -; ASM-NEXT: vmov.s8 r0, d1[2] -; ASM-NEXT: vmov.s16 r1, d3[1] -; ASM-NEXT: vmov.s16 r2, d5[1] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d7[1], r2 -; ASM-NEXT: vmov.s8 r0, d1[4] -; ASM-NEXT: vmov.s16 r1, d3[2] -; ASM-NEXT: vmov.s16 r2, d5[2] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d7[2], r2 -; ASM-NEXT: vmov.s8 r0, d1[6] -; ASM-NEXT: vmov.s16 r1, d3[3] -; ASM-NEXT: vmov.s16 r2, d5[3] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.16 d7[3], r2 -; ASM-NEXT: vmov.i16 q0, q3 -; ASM-NEXT: bx lr +; ASM: vshl.u16 [[M:.*]], {{.*}}, #15 +; ASM-NEXT: vshr.s16 [[M:.*]], {{.*}}, #15 +; ASM-NEXT: vbsl.i16 [[M]], {{.*}} +; DIS: 20: f29f0550 +; DIS-NEXT: 24: f2910050 +; DIS-NEXT: 28: f3120154 +; IASM-NOT: vshl +; IASM-NOT: vshr +; IASM-NOT: vbsl ret <8 x i16> %res } @@ -154,110 +86,21 @@ define internal <16 x i8> @select16i8(<16 x i1> %s, <16 x i8> %a, <16 x i8> %b) { ; ASM-LABEL:select16i8: -; DIS-LABEL:00000000 <select16i8>: +; DIS-LABEL:00000030 <select16i8>: +; IASM-LABEL:select16i8: entry: %res = select <16 x i1> %s, <16 x i8> %a, <16 x i8> %b -; ASM: # q3 = def.pseudo -; ASM-NEXT: vmov.s8 r0, d0[0] -; ASM-NEXT: vmov.s8 r1, d2[0] -; ASM-NEXT: vmov.s8 r2, d4[0] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[0], r2 -; ASM-NEXT: vmov.s8 r0, d0[1] -; ASM-NEXT: vmov.s8 r1, d2[1] -; ASM-NEXT: vmov.s8 r2, d4[1] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[1], r2 -; ASM-NEXT: vmov.s8 r0, d0[2] -; ASM-NEXT: vmov.s8 r1, d2[2] -; ASM-NEXT: vmov.s8 r2, d4[2] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[2], r2 -; ASM-NEXT: vmov.s8 r0, d0[3] -; ASM-NEXT: vmov.s8 r1, d2[3] -; ASM-NEXT: vmov.s8 r2, d4[3] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[3], r2 -; ASM-NEXT: vmov.s8 r0, d0[4] -; ASM-NEXT: vmov.s8 r1, d2[4] -; ASM-NEXT: vmov.s8 r2, d4[4] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[4], r2 -; ASM-NEXT: vmov.s8 r0, d0[5] -; ASM-NEXT: vmov.s8 r1, d2[5] -; ASM-NEXT: vmov.s8 r2, d4[5] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[5], r2 -; ASM-NEXT: vmov.s8 r0, d0[6] -; ASM-NEXT: vmov.s8 r1, d2[6] -; ASM-NEXT: vmov.s8 r2, d4[6] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[6], r2 -; ASM-NEXT: vmov.s8 r0, d0[7] -; ASM-NEXT: vmov.s8 r1, d2[7] -; ASM-NEXT: vmov.s8 r2, d4[7] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d6[7], r2 -; ASM-NEXT: vmov.s8 r0, d1[0] -; ASM-NEXT: vmov.s8 r1, d3[0] -; ASM-NEXT: vmov.s8 r2, d5[0] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[0], r2 -; ASM-NEXT: vmov.s8 r0, d1[1] -; ASM-NEXT: vmov.s8 r1, d3[1] -; ASM-NEXT: vmov.s8 r2, d5[1] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[1], r2 -; ASM-NEXT: vmov.s8 r0, d1[2] -; ASM-NEXT: vmov.s8 r1, d3[2] -; ASM-NEXT: vmov.s8 r2, d5[2] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[2], r2 -; ASM-NEXT: vmov.s8 r0, d1[3] -; ASM-NEXT: vmov.s8 r1, d3[3] -; ASM-NEXT: vmov.s8 r2, d5[3] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[3], r2 -; ASM-NEXT: vmov.s8 r0, d1[4] -; ASM-NEXT: vmov.s8 r1, d3[4] -; ASM-NEXT: vmov.s8 r2, d5[4] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[4], r2 -; ASM-NEXT: vmov.s8 r0, d1[5] -; ASM-NEXT: vmov.s8 r1, d3[5] -; ASM-NEXT: vmov.s8 r2, d5[5] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[5], r2 -; ASM-NEXT: vmov.s8 r0, d1[6] -; ASM-NEXT: vmov.s8 r1, d3[6] -; ASM-NEXT: vmov.s8 r2, d5[6] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[6], r2 -; ASM-NEXT: vmov.s8 r0, d1[7] -; ASM-NEXT: vmov.s8 r1, d3[7] -; ASM-NEXT: vmov.s8 r2, d5[7] -; ASM-NEXT: tst r0, #1 -; ASM-NEXT: movne r2, r1 -; ASM-NEXT: vmov.8 d7[7], r2 -; ASM-NEXT: vmov.i8 q0, q3 -; ASM-NEXT: bx lr +; ASM: vshl.u8 [[M:.*]], {{.*}}, #7 +; ASM-NEXT: vshr.s8 [[M:.*]], {{.*}}, #7 +; ASM-NEXT: vbsl.i8 [[M]], {{.*}} +; DIS: 30: f28f0550 +; DIS-NEXT: 34: f2890050 +; DIS-NEXT: 38: f3120154 +; IASM-NOT: vshl +; IASM-NOT: vshr +; IASM-NOT: vbsl ret <16 x i8> %res }
diff --git a/tests_lit/assembler/arm32/vcvt.f32.s32.ll b/tests_lit/assembler/arm32/vcvt.f32.s32.ll index 92edf99..8481e87 100644 --- a/tests_lit/assembler/arm32/vcvt.f32.s32.ll +++ b/tests_lit/assembler/arm32/vcvt.f32.s32.ll
@@ -46,7 +46,7 @@ ; ASM: vcvt.f32.s32 q0, q0 ; DIS: 40: f3bb0640 -; IASM-NOT: vcvt.f32.s32 +; IASM-NOT: vcvt ret <4 x float> %v }
diff --git a/tests_lit/assembler/arm32/vcvt.f32.u32.ll b/tests_lit/assembler/arm32/vcvt.f32.u32.ll index dee7f3b..56fd7c8 100644 --- a/tests_lit/assembler/arm32/vcvt.f32.u32.ll +++ b/tests_lit/assembler/arm32/vcvt.f32.u32.ll
@@ -46,7 +46,7 @@ ; ASM: vcvt.f32.u32 q0, q0 ; DIS: 40: f3bb06c0 -; IASM-NOT: vcvt.f32.u32 +; IASM-NOT: vcvt ret <4 x float> %v }
diff --git a/tests_lit/assembler/arm32/vcvt.s32.f32.ll b/tests_lit/assembler/arm32/vcvt.s32.f32.ll index c38e752..656ba04 100644 --- a/tests_lit/assembler/arm32/vcvt.s32.f32.ll +++ b/tests_lit/assembler/arm32/vcvt.s32.f32.ll
@@ -46,7 +46,7 @@ ; ASM: vcvt.s32.f32 q0, q0 ; DIS: 40: f3bb0740 -; IASM-NOT: vcvt.s32.f32 +; IASM-NOT: vcvt ret <4 x i32> %v }
diff --git a/tests_lit/assembler/arm32/vcvt.u32.f32.ll b/tests_lit/assembler/arm32/vcvt.u32.f32.ll index 6b08765..2fae0f5 100644 --- a/tests_lit/assembler/arm32/vcvt.u32.f32.ll +++ b/tests_lit/assembler/arm32/vcvt.u32.f32.ll
@@ -45,7 +45,7 @@ ; ASM: vcvt.u32.f32 q0, q0 ; DIS: 40: f3bb07c0 -; IASM-NOT: vcvt.u32.f32 +; IASM-NOT: vcvt ret <4 x i32> %v }
diff --git a/tests_lit/assembler/arm32/vec-sh-imm.ll b/tests_lit/assembler/arm32/vec-sh-imm.ll index 0436824..c2c5ebc 100644 --- a/tests_lit/assembler/arm32/vec-sh-imm.ll +++ b/tests_lit/assembler/arm32/vec-sh-imm.ll
@@ -34,8 +34,8 @@ ; ASM-NEXT: vshr.s32 {{.*}}, #31 ; DIS: 0: f2bf0550 ; DIS-NEXT: 4: f2a10050 -; IASM-NOT: vshl.u32 {{.*}}, #31 -; IASM-NOT: vshr.s32 {{.*}}, #31 +; IASM-NOT: vshl +; IASM-NOT: vshr } define internal <8 x i16> @SextV8I1(<8 x i16> %a) { @@ -46,12 +46,12 @@ %trunc = trunc <8 x i16> %a to <8 x i1> %sext = sext <8 x i1> %trunc to <8 x i16> ret <8 x i16> %sext -; ASM: vshl.u16 {{.*}}, #15 -; ASM-NEXT: vshr.s16 {{.*}}, #15 +; ASM: vshl.u16 {{.*}}, #15 +; ASM-NEXT: vshr.s16 {{.*}}, #15 ; DIS: 10: f29f0550 ; DIS-NEXT: 14: f2910050 -; IASM-NOT: vshl.u16 {{.*}}, #15 -; IASM-NOT: vshr.s16 {{.*}}, #15 +; IASM-NOT: vshl +; IASM-NOT: vshr } define internal <16 x i8> @SextV16I1(<16 x i8> %a) { @@ -62,10 +62,10 @@ %trunc = trunc <16 x i8> %a to <16 x i1> %sext = sext <16 x i1> %trunc to <16 x i8> ret <16 x i8> %sext -; ASM: vshl.u8 {{.*}}, #7 -; ASM-NEXT: vshr.s8 {{.*}}, #7 +; ASM: vshl.u8 {{.*}}, #7 +; ASM-NEXT: vshr.s8 {{.*}}, #7 ; DIS: 20: f28f0550 ; DIS-NEXT: 24: f2890050 -; IASM-NOT: vshl.u8 {{.*}}, #7 -; IASM-NOT: vshr.s8 {{.*}}, #7 +; IASM-NOT: vshl +; IASM-NOT: vshr }