Add insert/extract element to the integrated ARM assembler.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4334
R=jpp@chromium.org
Review URL: https://codereview.chromium.org/1679023008 .
diff --git a/src/DartARM32/assembler_arm.cc b/src/DartARM32/assembler_arm.cc
index 106ecb4..8933b26 100644
--- a/src/DartARM32/assembler_arm.cc
+++ b/src/DartARM32/assembler_arm.cc
@@ -683,7 +683,8 @@
Emit(encoding);
}
-
+#if 0
+// Moved to ARM32::AssemblerARM32::vmovdqir().
void Assembler::vmovdr(DRegister dn, int i, Register rt, Condition cond) {
ASSERT(TargetCPUFeatures::vfp_supported());
ASSERT((i == 0) || (i == 1));
@@ -701,7 +702,6 @@
Emit(encoding);
}
-#if 0
// Moved to ARM32::AssemblerARM32::vmovdrr().
void Assembler::vmovdrr(DRegister dm, Register rt, Register rt2,
Condition cond) {
diff --git a/src/DartARM32/assembler_arm.h b/src/DartARM32/assembler_arm.h
index d483053..9c6d9b1 100644
--- a/src/DartARM32/assembler_arm.h
+++ b/src/DartARM32/assembler_arm.h
@@ -630,9 +630,8 @@
void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vmovrrd().
void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL);
-#endif
+ // Moved to ARM32::AssemblerARM32::vmovqir().
void vmovdr(DRegister dd, int i, Register rt, Condition cond = AL);
-#if 0
// Moved to ARM32::AssemblerARM32::vmovss().
void vmovs(SRegister sd, SRegister sm, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vmovdd().
@@ -1409,6 +1408,7 @@
// ARM32::AssemblerARM32::veord()
// ARM32::AssemblerARM32::vld1qr()
// ARM32::AssemblerARM32::vst1qr()
+ // ARM32::AssemblerARM32::vmorqi()
#endif
DISALLOW_ALLOCATION();
diff --git a/src/IceAssemblerARM32.cpp b/src/IceAssemblerARM32.cpp
index ba328c8..a9ef88e 100644
--- a/src/IceAssemblerARM32.cpp
+++ b/src/IceAssemblerARM32.cpp
@@ -158,7 +158,7 @@
return 3;
default:
llvm::report_fatal_error("SIMD op: Don't understand element type " +
- std::string(typeString(ElmtTy)));
+ typeIceString(ElmtTy));
}
}
@@ -213,7 +213,17 @@
return RegARM32::getEncodedQReg(Var->getRegNum());
}
-IValueT mapQRegToDReg(IValueT EncodedQReg) { return EncodedQReg << 1; }
+IValueT mapQRegToDReg(IValueT EncodedQReg) {
+ IValueT DReg = EncodedQReg << 1;
+ assert(DReg < RegARM32::getNumDRegs());
+ return DReg;
+}
+
+IValueT mapQRegToSReg(IValueT EncodedQReg) {
+ IValueT SReg = EncodedQReg << 2;
+ assert(SReg < RegARM32::getNumSRegs());
+ return SReg;
+}
IValueT getYInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY & 0x1; }
@@ -1010,6 +1020,60 @@
emitInst(Encoding);
}
+void AssemblerARM32::emitInsertExtractInt(CondARM32::Cond Cond,
+ const Operand *OpQn, uint32_t Index,
+ const Operand *OpRt, bool IsExtract,
+ const char *InstName) {
+ const IValueT Rt = encodeGPRegister(OpRt, "Rt", InstName);
+ IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", InstName));
+ assert(Rt != RegARM32::Encoded_Reg_pc);
+ assert(Rt != RegARM32::Encoded_Reg_sp);
+ assert(CondARM32::isDefined(Cond));
+ const uint32_t BitSize = typeWidthInBytes(OpRt->getType()) * CHAR_BIT;
+ IValueT Opcode1 = 0;
+ IValueT Opcode2 = 0;
+ switch (BitSize) {
+ default:
+ llvm::report_fatal_error(std::string(InstName) +
+ ": Unable to process type " +
+ typeIceString(OpRt->getType()));
+ case 8:
+ assert(Index < 16);
+ Dn = Dn | mask(Index, 3, 1);
+ Opcode1 = B1 | mask(Index, 2, 1);
+ Opcode2 = mask(Index, 0, 2);
+ break;
+ case 16:
+ assert(Index < 8);
+ Dn = Dn | mask(Index, 2, 1);
+ Opcode1 = mask(Index, 1, 1);
+ Opcode2 = (mask(Index, 0, 1) << 1) | B0;
+ break;
+ case 32:
+ assert(Index < 4);
+ Dn = Dn | mask(Index, 1, 1);
+ Opcode1 = mask(Index, 0, 1);
+ break;
+ }
+ const IValueT Encoding = B27 | B26 | B25 | B11 | B9 | B8 | B4 |
+ (encodeCondition(Cond) << kConditionShift) |
+ (Opcode1 << 21) |
+ (getXXXXInRegYXXXX(Dn) << kRnShift) | (Rt << 12) |
+ (encodeBool(IsExtract) << 20) |
+ (getYInRegYXXXX(Dn) << 7) | (Opcode2 << 5);
+ emitInst(Encoding);
+}
+
+void AssemblerARM32::emitMoveSS(CondARM32::Cond Cond, IValueT Sd, IValueT Sm) {
+ // VMOV (register) - ARM section A8.8.340, encoding A2:
+ // vmov<c>.f32 <Sd>, <Sm>
+ //
+ // cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
+ constexpr IValueT VmovssOpcode = B23 | B21 | B20 | B6;
+ constexpr IValueT S0 = 0;
+ emitVFPsss(Cond, VmovssOpcode, Sd, S0, Sm);
+}
+
void AssemblerARM32::emitMulOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd,
IValueT Rn, IValueT Rm, IValueT Rs,
bool SetFlags) {
@@ -2654,6 +2718,33 @@
emitInst(Encoding);
}
+void AssemblerARM32::vmovqir(const Operand *OpQn, uint32_t Index,
+ const Operand *OpRt, CondARM32::Cond Cond) {
+ // VMOV (ARM core register to scalar) - ARM section A8.8.341, encoding A1:
+ // vmov<c>.<size> <Dn[x]>, <Rt>
+ constexpr const char *Vmovdr = "vmovdr";
+ constexpr bool IsExtract = true;
+ emitInsertExtractInt(Cond, OpQn, Index, OpRt, !IsExtract, Vmovdr);
+}
+
+void AssemblerARM32::vmovqis(const Operand *OpQd, uint32_t Index,
+ const Operand *OpSm, CondARM32::Cond Cond) {
+ constexpr const char *Vmovqis = "vmovqis";
+ assert(Index < 4);
+ IValueT Sd = mapQRegToSReg(encodeQRegister(OpQd, "Qd", Vmovqis)) + Index;
+ IValueT Sm = encodeSRegister(OpSm, "Sm", Vmovqis);
+ emitMoveSS(Cond, Sd, Sm);
+}
+
+void AssemblerARM32::vmovrqi(const Operand *OpRt, const Operand *OpQn,
+ uint32_t Index, CondARM32::Cond Cond) {
+ // VMOV (scalar to ARM core register) - ARM section A8.8.342, encoding A1:
+ // vmov<c>.<dt> <Rt>, <Dn[x]>
+ constexpr const char *Vmovrd = "vmovrd";
+ constexpr bool IsExtract = true;
+ emitInsertExtractInt(Cond, OpQn, Index, OpRt, IsExtract, Vmovrd);
+}
+
void AssemblerARM32::vmovrrd(const Operand *OpRt, const Operand *OpRt2,
const Operand *OpDm, CondARM32::Cond Cond) {
// VMOV (between two ARM core registers and a doubleword extension register).
@@ -2716,16 +2807,20 @@
void AssemblerARM32::vmovss(const Operand *OpSd, const Variable *OpSm,
CondARM32::Cond Cond) {
- // VMOV (register) - ARM section A8.8.340, encoding A2:
- // vmov<c>.f32 <Sd>, <Sm>
- //
- // cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
constexpr const char *Vmovss = "Vmovss";
IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovss);
IValueT Sm = encodeSRegister(OpSm, "Sm", Vmovss);
- constexpr IValueT VmovssOpcode = B23 | B21 | B20 | B6;
- constexpr IValueT S0 = 0;
- emitVFPsss(Cond, VmovssOpcode, Sd, S0, Sm);
+ emitMoveSS(Cond, Sd, Sm);
+}
+
+void AssemblerARM32::vmovsqi(const Operand *OpSd, const Operand *OpQm,
+ uint32_t Index, CondARM32::Cond Cond) {
+ constexpr const char *Vmovsqi = "vmovsqi";
+ const IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovsqi);
+ assert(Index < 4);
+ const IValueT Sm =
+ mapQRegToSReg(encodeQRegister(OpQm, "Qm", Vmovsqi)) + Index;
+ emitMoveSS(Cond, Sd, Sm);
}
void AssemblerARM32::vmovsr(const Operand *OpSn, const Operand *OpRt,
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h
index 3e6d5a3..3ae857b 100644
--- a/src/IceAssemblerARM32.h
+++ b/src/IceAssemblerARM32.h
@@ -417,24 +417,48 @@
vld1qr(ElmtSize, OpQd, OpRn, TInfo);
}
+ // Dn = FpImm
void vmovd(const Operand *OpDn, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond);
+ // Dd = Dm
void vmovdd(const Operand *OpDd, const Variable *OpDm, CondARM32::Cond Cond);
+ // Dm = Rt:Rt2
void vmovdrr(const Operand *OpDm, const Operand *OpRt, const Operand *OpRt2,
CondARM32::Cond Cond);
+ // Qd[Index] = Rt
+ void vmovqir(const Operand *OpQd, uint32_t Index, const Operand *OpRt,
+ CondARM32::Cond Cond);
+
+ // Qd[Index] = Sm
+ void vmovqis(const Operand *OpQd, uint32_t Indx, const Operand *OpSm,
+ CondARM32::Cond Cond);
+
+ // Rt = Qm[Index]
+ void vmovrqi(const Operand *OpRt, const Operand *OpQd, uint32_t Index,
+ CondARM32::Cond Cond);
+
+ // Rt:Rt2 = Dm
void vmovrrd(const Operand *OpRt, const Operand *OpRt2, const Operand *OpDm,
CondARM32::Cond Cond);
+ // Rt = Sn
void vmovrs(const Operand *OpRt, const Operand *OpSn, CondARM32::Cond Cond);
+ // Sn = FpImm
void vmovs(const Operand *OpSn, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond);
- void vmovss(const Operand *OpDd, const Variable *OpDm, CondARM32::Cond Cond);
+ // Sd = Sm
+ void vmovss(const Operand *OpSd, const Variable *OpSm, CondARM32::Cond Cond);
+ // Sd = Qm[Index]
+ void vmovsqi(const Operand *OpSd, const Operand *OpQm, uint32_t Index,
+ CondARM32::Cond Cond);
+
+ // Sn = Rt
void vmovsr(const Operand *OpSn, const Operand *OpRt, CondARM32::Cond Cond);
void vmlad(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
@@ -641,6 +665,17 @@
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
IValueT Rm);
+ // cccc1110iiiennnntttt1011Njj10000 where cccc=Cond, tttt=Rt, Ndddd=2*Qn=Dn,
+ // iii=Opcode1, jj=Opcode2, Opcode1Opcode2 encodes Index and the
+ // corresponding element size of the vector element, and e=IsExtract.
+ void emitInsertExtractInt(CondARM32::Cond Cond, const Operand *OpQn,
+ uint32_t Index, const Operand *OpRt, bool IsExtract,
+ const char *InstName);
+
+ // cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
+ // Assigns Sd the value of Sm.
+ void emitMoveSS(CondARM32::Cond Cond, IValueT Sd, IValueT Sm);
+
// Pattern ccccxxxxxxxfnnnnddddssss1001mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
// mmmm=Rm, ssss=Rs, f=SetFlags and xxxxxxx=Opcode.
void emitMulOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 4538aa9..4a0fc4c 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -1067,6 +1067,8 @@
}
}
+namespace {
+
// These next two functions find the D register that maps to the half of the Q
// register that this instruction is accessing.
Register getDRegister(const Variable *Src, uint32_t Index) {
@@ -1124,6 +1126,8 @@
return static_cast<Register>(RegARM32::RegTable[SrcReg].Aliases[Index + 3]);
}
+} // end of anonymous namespace
+
void InstARM32Extract::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
const Type DestTy = getDest()->getType();
@@ -1162,6 +1166,23 @@
}
}
+void InstARM32Extract::emitIAS(const Cfg *Func) const {
+ const Operand *Dest = getDest();
+ const Type DestTy = Dest->getType();
+ const Operand *Src = getSrc(0);
+ assert(isVectorType(Src->getType()));
+ assert(DestTy == typeElementType(Src->getType()));
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ if (isIntegerType(DestTy)) {
+ Asm->vmovrqi(Dest, Src, Index, getPredicate());
+ assert(!Asm->needsTextFixup());
+ return;
+ }
+ assert(isFloatingType(DestTy));
+ Asm->vmovsqi(Dest, Src, Index, getPredicate());
+ assert(!Asm->needsTextFixup());
+}
+
void InstARM32Insert::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
const Variable *Dest = getDest();
@@ -1193,6 +1214,24 @@
}
}
+void InstARM32Insert::emitIAS(const Cfg *Func) const {
+ const Variable *Dest = getDest();
+ const Operand *Src = getSrc(0);
+ const Type SrcTy = Src->getType();
+ assert(isVectorType(Dest->getType()));
+ assert(typeElementType(Dest->getType()) == SrcTy);
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ if (isIntegerType(SrcTy)) {
+ const Operand *Src = getSrc(0);
+ Asm->vmovqir(Dest, Index, Src, getPredicate());
+ assert(!Asm->needsTextFixup());
+ return;
+ }
+ assert(isFloatingType(SrcTy));
+ Asm->vmovqis(Dest, Index, Src, getPredicate());
+ assert(!Asm->needsTextFixup());
+}
+
template <InstARM32::InstKindARM32 K>
void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 40518c1..96a95c5 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -1368,6 +1368,7 @@
InstARM32Extract(Func, Dest, Src0, Index, Predicate);
}
void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Extract); }
private:
@@ -1396,6 +1397,7 @@
InstARM32Insert(Func, Dest, Src0, Index, Predicate);
}
void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Insert); }
private:
diff --git a/tests_lit/assembler/arm32/insert-extract.ll b/tests_lit/assembler/arm32/insert-extract.ll
index c96c995..5bbb18b 100644
--- a/tests_lit/assembler/arm32/insert-extract.ll
+++ b/tests_lit/assembler/arm32/insert-extract.ll
@@ -27,9 +27,9 @@
%1 = extractelement <4 x i32> %src, i32 1
-; ASM: vmov.32 r0, d0[1]
+; ASM: vmov.32 r0, d0[1]
; DIS: 10: ee300b10
-
+; IASM-NOT: vmov.32 r0, d0[1]
ret i32 %1
}
@@ -40,8 +40,9 @@
%1 = extractelement <4 x i32> %src, i32 2
-; ASM: vmov.32 r0, d1[0]
+; ASM: vmov.32 r0, d1[0]
; DIS: 40: ee110b10
+; IASM-NOT: vmov.32 r0, d1[0]
ret i32 %1
}
@@ -53,8 +54,10 @@
%1 = extractelement <8 x i16> %src, i32 3
-; ASM: vmov.s16 r0, d0[3]
+; ASM: vmov.s16 r0, d0[3]
; DIS: 70: ee300b70
+; IASM-NOT: vmov.s16 r0, d0[3]
+
%2 = sext i16 %1 to i32
ret i32 %2
}
@@ -66,8 +69,9 @@
%1 = extractelement <8 x i16> %src, i32 4
-; ASM: vmov.s16 r0, d1[0]
+; ASM: vmov.s16 r0, d1[0]
; DIS: a0: ee110b30
+; IASM-NOT: vmov.s16 r0, d1[0]
%2 = sext i16 %1 to i32
ret i32 %2
@@ -80,8 +84,9 @@
%1 = extractelement <16 x i8> %src, i32 7
-; ASM: vmov.s8 r0, d0[7]
+; ASM: vmov.s8 r0, d0[7]
; DIS: d0: ee700b70
+; IASM-NOT: vmov.s8 r0, d0[7]
%2 = sext i8 %1 to i32
ret i32 %2
@@ -94,8 +99,9 @@
%1 = extractelement <16 x i8> %src, i32 8
-; ASM: vmov.s8 r0, d1[0]
+; ASM: vmov.s8 r0, d1[0]
; DIS: 100: ee510b10
+; IASM-NOT: vmov.s8 r0, d1[0]
%2 = sext i8 %1 to i32
ret i32 %2
@@ -108,8 +114,9 @@
%1 = extractelement <4 x float> %src, i32 1
-; ASM: vmov.f32 s0, s1
+; ASM: vmov.f32 s0, s1
; DIS: 130: eeb00a60
+; IASM-NOT: vmov.f32 s0, s1
ret float %1
}
@@ -121,8 +128,9 @@
%1 = extractelement <4 x float> %src, i32 2
-; ASM: vmov.f32 s0, s2
+; ASM: vmov.f32 s0, s2
; DIS: 160: eeb00a41
+; IASM-NOT: vmov.f32 s0, s2
ret float %1
}
@@ -134,8 +142,9 @@
%1 = insertelement <4 x i32> %src, i32 %s, i32 1
-; ASM: vmov.32 d0[1], r0
+; ASM: vmov.32 d0[1], r0
; DIS: 198: ee200b10
+; IASM-NOT: vmov.32 d0[1], r0
ret <4 x i32> %1
}
@@ -147,8 +156,9 @@
%1 = insertelement <4 x i32> %src, i32 %s, i32 2
-; ASM: vmov.32 d1[0], r0
+; ASM: vmov.32 d1[0], r0
; DIS: 1c8: ee010b10
+; IASM-NOT: vmov.32 d1[0], r0
ret <4 x i32> %1
}
@@ -161,8 +171,10 @@
%s2 = trunc i32 %s to i16
%1 = insertelement <8 x i16> %src, i16 %s2, i32 3
-; ASM: vmov.16 d0[3], r0
+; ASM: vmov.16 d0[3], r0
; DIS: 200: ee200b70
+; IASM-NOT: vmov.16 d0[3], r0
+
ret <8 x i16> %1
}
@@ -174,8 +186,10 @@
%s2 = trunc i32 %s to i16
%1 = insertelement <8 x i16> %src, i16 %s2, i32 4
-; ASM: vmov.16 d1[0], r0
+; ASM: vmov.16 d1[0], r0
; DIS: 240: ee010b30
+; IASM-NOT: vmov.16 d1[0], r0
+
ret <8 x i16> %1
}
@@ -187,8 +201,9 @@
%s2 = trunc i32 %s to i8
%1 = insertelement <16 x i8> %src, i8 %s2, i32 7
-; ASM: vmov.8 d0[7], r0
+; ASM: vmov.8 d0[7], r0
; DIS: 280: ee600b70
+; IASM-NOT: vmov.8 d0[7], r0
ret <16 x i8> %1
}
@@ -201,8 +216,9 @@
%s2 = trunc i32 %s to i8
%1 = insertelement <16 x i8> %src, i8 %s2, i32 8
-; ASM: vmov.8 d1[0], r0
+; ASM: vmov.8 d1[0], r0
; DIS: 2c0: ee410b10
+; IASM-NOT: vmov.8 d1[0], r0
ret <16 x i8> %1
}
@@ -214,8 +230,9 @@
%1 = insertelement <4 x float> %src, float %s, i32 1
-; ASM: vmov.f32 s1, s4
+; ASM: vmov.f32 s1, s4
; DIS: 2f8: eef00a42
+; IASM-NOT: vmov.f32 s1, s4
ret <4 x float> %1
}
@@ -227,8 +244,9 @@
%1 = insertelement <4 x float> %src, float %s, i32 2
-; ASM: vmov.f32 s2, s4
+; ASM: vmov.f32 s2, s4
; DIS: 328: eeb01a42
+; IASM-NOT: vmov.f32 s2, s4
ret <4 x float> %1
}