ARM32 vorr lowering
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1639403004 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index be8488c..9ca1085 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -673,6 +673,11 @@
}
}
+template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const {
+ // TODO(kschimpf): add support for these instructions
+ emitUsingTextFixup(Func);
+}
+
template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
@@ -1040,6 +1045,7 @@
template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
+template <> const char *InstARM32Vorr::Opcode = "vorr";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 2d51e89..413f0f5 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -431,6 +431,7 @@
Vmls,
Vmrs,
Vmul,
+ Vorr,
Vsqrt,
Vsub
};
@@ -925,6 +926,7 @@
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
+using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index f836fe5..4b2dc98 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -2808,6 +2808,7 @@
case InstArithmetic::Fsub:
case InstArithmetic::Sub:
case InstArithmetic::And:
+ case InstArithmetic::Or:
break;
}
}
@@ -2968,8 +2969,13 @@
}
case InstArithmetic::Or: {
Variable *Src0R = Srcs.src0R(this);
- Operand *Src1RF = Srcs.src1RF(this);
- _orr(T, Src0R, Src1RF);
+ if (isVectorType(DestTy)) {
+ Variable *Src1R = legalizeToReg(Src1);
+ _vorr(T, Src0R, Src1R);
+ } else {
+ Operand *Src1RF = Srcs.src1RF(this);
+ _orr(T, Src0R, Src1RF);
+ }
_mov(Dest, T);
return;
}
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 8779beb..b8c1040 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -802,6 +802,9 @@
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
}
+ void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
+ Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
+ }
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
diff --git a/tests_lit/assembler/arm32/and-vec.ll b/tests_lit/assembler/arm32/and-vec.ll
index 3cf635a..e4d9625 100644
--- a/tests_lit/assembler/arm32/and-vec.ll
+++ b/tests_lit/assembler/arm32/and-vec.ll
@@ -1,4 +1,4 @@
-; Show that we know how to translate vsub vector instructions.
+; Show that we know how to translate vand vector instructions.
; REQUIRES: allow_dump
diff --git a/tests_lit/assembler/arm32/or-vec.ll b/tests_lit/assembler/arm32/or-vec.ll
new file mode 100644
index 0000000..dd9a51f
--- /dev/null
+++ b/tests_lit/assembler/arm32/or-vec.ll
@@ -0,0 +1,115 @@
+; Show that we know how to translate vorr vector instructions.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN: | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN: --args -O2 \
+; RUN: | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN: | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN: --args -O2 \
+; RUN: | FileCheck %s --check-prefix=DIS
+
+define internal <4 x i32> @testVor4i32(<4 x i32> %v1, <4 x i32> %v2) {
+; ASM-LABEL: testVor4i32:
+; DIS-LABEL: 00000000 <testVor4i32>:
+; IASM-LABEL: testVor4i32:
+
+entry:
+ %res = or <4 x i32> %v1, %v2
+
+; ASM: vorr.i32 q0, q0, q1
+; DIS: 0: f2200152
+; IASM: vorr.i32
+
+ ret <4 x i32> %res
+}
+
+define internal <8 x i16> @testVor8i16(<8 x i16> %v1, <8 x i16> %v2) {
+; ASM-LABEL: testVor8i16:
+; DIS-LABEL: 00000010 <testVor8i16>:
+; IASM-LABEL: testVor8i16:
+
+entry:
+ %res = or <8 x i16> %v1, %v2
+
+; ASM: vorr.i16 q0, q0, q1
+; DIS: 10: f2200152
+; IASM: vorr.i16
+
+ ret <8 x i16> %res
+}
+
+define internal <16 x i8> @testVor16i8(<16 x i8> %v1, <16 x i8> %v2) {
+; ASM-LABEL: testVor16i8:
+; DIS-LABEL: 00000020 <testVor16i8>:
+; IASM-LABEL: testVor16i8:
+
+entry:
+ %res = or <16 x i8> %v1, %v2
+
+; ASM: vorr.i8 q0, q0, q1
+; DIS: 20: f2200152
+; IASM: vorr.i8
+
+ ret <16 x i8> %res
+}
+
+;;
+;; The following tests make sure logical or works on predicate vectors.
+;;
+
+define internal <4 x i1> @testVor4i1(<4 x i1> %v1, <4 x i1> %v2) {
+; ASM-LABEL: testVor4i1:
+; DIS-LABEL: 00000030 <testVor4i1>:
+; IASM-LABEL: testVor4i1:
+
+entry:
+ %res = or <4 x i1> %v1, %v2
+
+; ASM: vorr.i32 q0, q0, q1
+; DIS: 30: f2200152
+; IASM: vorr.i32
+
+ ret <4 x i1> %res
+}
+
+define internal <8 x i1> @testVor8i1(<8 x i1> %v1, <8 x i1> %v2) {
+; ASM-LABEL: testVor8i1:
+; DIS-LABEL: 00000040 <testVor8i1>:
+; IASM-LABEL: testVor8i1:
+
+entry:
+ %res = or <8 x i1> %v1, %v2
+
+; ASM: vorr.i16 q0, q0, q1
+; DIS: 40: f2200152
+; IASM: vorr.i16
+
+ ret <8 x i1> %res
+}
+
+define internal <16 x i1> @testVor16i1(<16 x i1> %v1, <16 x i1> %v2) {
+; ASM-LABEL: testVor16i1:
+; DIS-LABEL: 00000050 <testVor16i1>:
+; IASM-LABEL: testVor16i1:
+
+entry:
+ %res = or <16 x i1> %v1, %v2
+
+; ASM: vorr.i8 q0, q0, q1
+; DIS: 50: f2200152
+; IASM: vorr.i8
+
+ ret <16 x i1> %res
+}