Implement vector intrinsics for ARM32.
MultiplyAddPairs is implemented using VMULL+VPADD.
MultiplyHighSigned/Unsigned is implemented using VMULL+VSHRN.
SubVectorLoad/Store is implemented using VLDR/VLD1/VSTR/VST1.
VectorPackSigned/Unsigned is implemented using two VQMOVN.
Bug b/37496078
Bug b/37496856
Bug b/37496321
Bug b/37496082
Change-Id: I141fd901d53da24ce780f503dc7ad17b94fc6ba8
Reviewed-on: https://chromium-review.googlesource.com/693049
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Reviewed-on: https://swiftshader-review.googlesource.com/12709
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/third_party/subzero/src/IceAssemblerARM32.cpp b/third_party/subzero/src/IceAssemblerARM32.cpp
index 2f1fa3c..a8e9021 100644
--- a/third_party/subzero/src/IceAssemblerARM32.cpp
+++ b/third_party/subzero/src/IceAssemblerARM32.cpp
@@ -606,6 +606,25 @@
"=pc not allowed when CC=1");
}
+enum SIMDShiftType { ST_Vshl, ST_Vshr };
+
+IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
+ const IValueT Imm) {
+ assert(Imm > 0);
+ const SizeT MaxShift = getScalarIntBitWidth(ElmtTy);
+ assert(Imm < 2 * MaxShift);
+ assert(ElmtTy == IceType_i8 || ElmtTy == IceType_i16 ||
+ ElmtTy == IceType_i32);
+ const IValueT VshlImm = Imm - MaxShift;
+ const IValueT VshrImm = 2 * MaxShift - Imm;
+ return ((Shift == ST_Vshl) ? VshlImm : VshrImm) & (2 * MaxShift - 1);
+}
+
+IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
+ const ConstantInteger32 *Imm6) {
+ const IValueT Imm = Imm6->getValue();
+ return encodeSIMDShiftImm6(Shift, ElmtTy, Imm);
+}
} // end of anonymous namespace
namespace Ice {
@@ -2838,6 +2857,31 @@
emitInst(Encoding);
}
+void AssemblerARM32::vldrq(const Operand *OpQd, const Operand *OpAddress,
+ CondARM32::Cond Cond, const TargetInfo &TInfo) {
+ // This is a pseudo-instruction which loads 64-bit data into a quadword
+ // vector register. It is implemented by loading into the lower doubleword.
+
+ // VLDR - ARM section A8.8.333, encoding A1.
+ // vldr<c> <Dd>, [<Rn>{, #+/-<imm>}]
+ //
+ // cccc1101UD01nnnndddd1011iiiiiiii where cccc=Cond, nnnn=Rn, Ddddd=Rd,
+ // iiiiiiii=abs(Imm >> 2), and U=1 if Opcode>=0.
+ constexpr const char *Vldrd = "vldrd";
+ IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vldrd));
+ assert(CondARM32::isDefined(Cond));
+ IValueT Address;
+ EncodedOperand AddressEncoding =
+ encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
+ (void)AddressEncoding;
+ assert(AddressEncoding == EncodedAsImmRegOffset);
+ IValueT Encoding = B27 | B26 | B24 | B20 | B11 | B9 | B8 |
+ (encodeCondition(Cond) << kConditionShift) |
+ (getYInRegYXXXX(Dd) << 22) |
+ (getXXXXInRegYXXXX(Dd) << 12) | Address;
+ emitInst(Encoding);
+}
+
void AssemblerARM32::vldrs(const Operand *OpSd, const Operand *OpAddress,
CondARM32::Cond Cond, const TargetInfo &TInfo) {
// VDLR - ARM section A8.8.333, encoding A2.
@@ -2893,6 +2937,38 @@
emitInst(Encoding);
}
+void AssemblerARM32::emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn,
+ IValueT Rm, size_t ElmtSize, IValueT Align,
+ const char *InstName) {
+ assert(Utils::IsAbsoluteUint(2, Align));
+ IValueT EncodedElmtSize;
+ switch (ElmtSize) {
+ default: {
+ std::string Buffer;
+ llvm::raw_string_ostream StrBuf(Buffer);
+ StrBuf << InstName << ": found invalid vector element size " << ElmtSize;
+ llvm::report_fatal_error(StrBuf.str());
+ }
+ case 8:
+ EncodedElmtSize = 0;
+ break;
+ case 16:
+ EncodedElmtSize = 1;
+ break;
+ case 32:
+ EncodedElmtSize = 2;
+ break;
+ case 64:
+ EncodedElmtSize = 3;
+ }
+ const IValueT Encoding =
+ Opcode | (encodeCondition(CondARM32::kNone) << kConditionShift) |
+ (getYInRegYXXXX(Dd) << 22) | (Rn << kRnShift) |
+ (getXXXXInRegYXXXX(Dd) << kRdShift) | (EncodedElmtSize << 10) |
+ (Align << 4) | Rm;
+ emitInst(Encoding);
+}
+
void AssemblerARM32::vld1qr(size_t ElmtSize, const Operand *OpQd,
const Operand *OpAddress, const TargetInfo &TInfo) {
// VLD1 (multiple single elements) - ARM section A8.8.320, encoding A1:
@@ -2915,6 +2991,36 @@
emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vld1qr);
}
+void AssemblerARM32::vld1(size_t ElmtSize, const Operand *OpQd,
+ const Operand *OpAddress, const TargetInfo &TInfo) {
+ // This is a pseudo-instruction for loading a single element of a quadword
+ // vector. For 64-bit the lower doubleword vector is loaded.
+
+ if (ElmtSize == 64) {
+ return vldrq(OpQd, OpAddress, Ice::CondARM32::AL, TInfo);
+ }
+
+ // VLD1 (single elements to one lane) - ARMv7-A/R section A8.6.308, encoding
+ // A1:
+ // VLD1<c>.<size> <list>, [<Rn>{@<align>}], <Rm>
+ //
+ // 111101001D10nnnnddddss00aaaammmm where tttt=DRegListSize2, Dddd=Qd,
+ // nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
+ // encoding of ElmtSize.
+ constexpr const char *Vld1qr = "vld1qr";
+ const IValueT Qd = encodeQRegister(OpQd, "Qd", Vld1qr);
+ const IValueT Dd = mapQRegToDReg(Qd);
+ IValueT Address;
+ if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
+ EncodedAsImmRegOffset)
+ llvm::report_fatal_error(std::string(Vld1qr) + ": malform memory address");
+ const IValueT Rn = mask(Address, kRnShift, 4);
+ constexpr IValueT Rm = RegARM32::Reg_pc;
+ constexpr IValueT Opcode = B26 | B23 | B21;
+ constexpr IValueT Align = 0; // use default alignment.
+ emitVMem1Op(Opcode, Dd, Rn, Rm, ElmtSize, Align, Vld1qr);
+}
+
bool AssemblerARM32::vmovqc(const Operand *OpQd, const ConstantInteger32 *Imm) {
// VMOV (immediate) - ARM section A8.8.320, encoding A1:
// VMOV.<dt> <Qd>, #<Imm>
@@ -3226,6 +3332,92 @@
emitSIMDqqq(VmulqiOpcode, ElmtTy, OpQd, OpQn, OpQm, Vmulqi);
}
+void AssemblerARM32::vmulh(Type ElmtTy, const Operand *OpQd,
+ const Operand *OpQn, const Operand *OpQm,
+ bool Unsigned) {
+ // Pseudo-instruction for multiplying the corresponding elements in the lower
+ // halves of two quadword vectors, and returning the high halves.
+
+ // VMULL (integer and polynomial) - ARMv7-A/R section A8.6.337, encoding A1:
+ // VMUL<c>.<dt> <Dd>, <Dn>, <Dm>
+ //
+ // 1111001U1Dssnnnndddd11o0N0M0mmmm
+ assert(isScalarIntegerType(ElmtTy) &&
+ "vmull expects vector with integer element type");
+ assert(ElmtTy != IceType_i64 && "vmull on i64 vector not allowed");
+ constexpr const char *Vmull = "vmull";
+
+ constexpr IValueT ElmtShift = 20;
+ const IValueT ElmtSize = encodeElmtType(ElmtTy);
+ assert(Utils::IsUint(2, ElmtSize));
+
+ const IValueT VmullOpcode =
+ B25 | (Unsigned ? B24 : 0) | B23 | (B20) | B11 | B10;
+
+ const IValueT Qd = encodeQRegister(OpQd, "Qd", Vmull);
+ const IValueT Qn = encodeQRegister(OpQn, "Qn", Vmull);
+ const IValueT Qm = encodeQRegister(OpQm, "Qm", Vmull);
+
+ const IValueT Dd = mapQRegToDReg(Qd);
+ const IValueT Dn = mapQRegToDReg(Qn);
+ const IValueT Dm = mapQRegToDReg(Qm);
+
+ constexpr bool UseQRegs = false;
+ constexpr bool IsFloatTy = false;
+ emitSIMDBase(VmullOpcode | (ElmtSize << ElmtShift), Dd, Dn, Dm, UseQRegs,
+ IsFloatTy);
+
+ // Shift and narrow to obtain high halves.
+ constexpr IValueT VshrnOpcode = B25 | B23 | B11 | B4;
+ const IValueT Imm6 = encodeSIMDShiftImm6(ST_Vshr, IceType_i16, 16);
+ constexpr IValueT ImmShift = 16;
+
+ emitSIMDBase(VshrnOpcode | (Imm6 << ImmShift), Dd, 0, Dd, UseQRegs,
+ IsFloatTy);
+}
+
+void AssemblerARM32::vmlap(Type ElmtTy, const Operand *OpQd,
+ const Operand *OpQn, const Operand *OpQm) {
+ // Pseudo-instruction for multiplying the corresponding elements in the lower
+ // halves of two quadword vectors, and pairwise-adding the results.
+
+ // VMULL (integer and polynomial) - ARM section A8.8.350, encoding A1:
+ // vmull<c>.<dt> <Qd>, <Qn>, <Qm>
+ //
+ // 1111001U1Dssnnnndddd11o0N0M0mmmm
+ assert(isScalarIntegerType(ElmtTy) &&
+ "vmull expects vector with integer element type");
+ assert(ElmtTy != IceType_i64 && "vmull on i64 vector not allowed");
+ constexpr const char *Vmull = "vmull";
+
+ constexpr IValueT ElmtShift = 20;
+ const IValueT ElmtSize = encodeElmtType(ElmtTy);
+ assert(Utils::IsUint(2, ElmtSize));
+
+ bool Unsigned = false;
+ const IValueT VmullOpcode =
+ B25 | (Unsigned ? B24 : 0) | B23 | (B20) | B11 | B10;
+
+ const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmull));
+ const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vmull));
+ const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vmull));
+
+ constexpr bool UseQRegs = false;
+ constexpr bool IsFloatTy = false;
+ emitSIMDBase(VmullOpcode | (ElmtSize << ElmtShift), Dd, Dn, Dm, UseQRegs,
+ IsFloatTy);
+
+ // VPADD - ARM section A8.8.280, encoding A1:
+ // vpadd.<dt> <Dd>, <Dm>, <Dn>
+ //
+ // 111100100Dssnnnndddd1011NQM1mmmm where Ddddd=<Dd>, Mmmmm=<Dm>, and
+ // Nnnnn=<Dn> and ss is the encoding of <dt>.
+ assert(ElmtTy != IceType_i64 && "vpadd doesn't allow i64!");
+ const IValueT VpaddOpcode =
+ B25 | B11 | B9 | B8 | B4 | ((encodeElmtType(ElmtTy) + 1) << 20);
+ emitSIMDBase(VpaddOpcode, Dd, Dd, Dd + 1, UseQRegs, IsFloatTy);
+}
+
void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm) {
// VMUL (floating-point) - ARM section A8.8.351, encoding A1:
@@ -3314,6 +3506,31 @@
emitInst(Encoding);
}
+void AssemblerARM32::vstrq(const Operand *OpQd, const Operand *OpAddress,
+ CondARM32::Cond Cond, const TargetInfo &TInfo) {
+ // This is a pseudo-instruction which stores 64-bit data into a quadword
+ // vector register. It is implemented by storing into the lower doubleword.
+
+ // VSTR - ARM section A8.8.413, encoding A1:
+ // vstr<c> <Dd>, [<Rn>{, #+/-<Imm>}]
+ //
+ // cccc1101UD00nnnndddd1011iiiiiiii where cccc=Cond, nnnn=Rn, Ddddd=Rd,
+ // iiiiiiii=abs(Imm >> 2), and U=1 if Imm>=0.
+ constexpr const char *Vstrd = "vstrd";
+ IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Dd", Vstrd));
+ assert(CondARM32::isDefined(Cond));
+ IValueT Address;
+ IValueT AddressEncoding =
+ encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
+ (void)AddressEncoding;
+ assert(AddressEncoding == EncodedAsImmRegOffset);
+ IValueT Encoding = B27 | B26 | B24 | B11 | B9 | B8 |
+ (encodeCondition(Cond) << kConditionShift) |
+ (getYInRegYXXXX(Dd) << 22) |
+ (getXXXXInRegYXXXX(Dd) << 12) | Address;
+ emitInst(Encoding);
+}
+
void AssemblerARM32::vstrs(const Operand *OpSd, const Operand *OpAddress,
CondARM32::Cond Cond, const TargetInfo &TInfo) {
// VSTR - ARM section A8.8.413, encoding A2:
@@ -3357,6 +3574,37 @@
emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vst1qr);
}
+void AssemblerARM32::vst1(size_t ElmtSize, const Operand *OpQd,
+ const Operand *OpAddress, const TargetInfo &TInfo) {
+
+ // This is a pseudo-instruction for storing a single element of a quadword
+ // vector. For 64-bit the lower doubleword vector is stored.
+
+ if (ElmtSize == 64) {
+ return vstrq(OpQd, OpAddress, Ice::CondARM32::AL, TInfo);
+ }
+
+ // VST1 (single element from one lane) - ARMv7-A/R section A8.6.392, encoding
+ // A1:
+ // VST1<c>.<size> <list>, [<Rn>{@<align>}], <Rm>
+ //
+ // 111101001D00nnnnddd0ss00aaaammmm where Dddd=Qd, nnnn=Rn,
+ // aaaa=0 (use default alignment), size=ElmtSize, and ss is the
+ // encoding of ElmtSize.
+ constexpr const char *Vst1qr = "vst1qr";
+ const IValueT Qd = encodeQRegister(OpQd, "Qd", Vst1qr);
+ const IValueT Dd = mapQRegToDReg(Qd);
+ IValueT Address;
+ if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
+ EncodedAsImmRegOffset)
+ llvm::report_fatal_error(std::string(Vst1qr) + ": malform memory address");
+ const IValueT Rn = mask(Address, kRnShift, 4);
+ constexpr IValueT Rm = RegARM32::Reg_pc;
+ constexpr IValueT Opcode = B26 | B23;
+ constexpr IValueT Align = 0; // use default alignment.
+ emitVMem1Op(Opcode, Dd, Rn, Rm, ElmtSize, Align, Vst1qr);
+}
+
void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn,
const Operand *OpSm, CondARM32::Cond Cond) {
// VSUB (floating-point) - ARM section A8.8.415, encoding A2:
@@ -3451,6 +3699,60 @@
emitSIMDqqq(VsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vsubqi);
}
+void AssemblerARM32::vqmovn2(Type DestElmtTy, const Operand *OpQd,
+ const Operand *OpQm, const Operand *OpQn,
+ bool Unsigned, bool Saturating) {
+ // Pseudo-instruction for packing two quadword vectors into one quadword
+ // vector, narrowing each element using saturation or truncation.
+
+ // VQMOVN - ARMv7-A/R section A8.6.361, encoding A1:
+ // V{Q}MOVN{U}N<c>.<type><size> <Dd>, <Qm>
+ //
+ // 111100111D11ss10dddd0010opM0mmm0 where Ddddd=OpQd, op = 10, Mmmm=OpQm,
+ // ss is 00 (16-bit), 01 (32-bit), or 10 (64-bit).
+
+ assert(DestElmtTy != IceType_i64 &&
+ "vmovn doesn't allow i64 destination vector elements!");
+
+ constexpr const char *Vqmovn = "vqmovn";
+ constexpr bool UseQRegs = false;
+ constexpr bool IsFloatTy = false;
+ const IValueT Qd = encodeQRegister(OpQd, "Qd", Vqmovn);
+ const IValueT Qm = encodeQRegister(OpQm, "Qm", Vqmovn);
+ const IValueT Qn = encodeQRegister(OpQn, "Qn", Vqmovn);
+ const IValueT Dd = mapQRegToDReg(Qd);
+ const IValueT Dm = mapQRegToDReg(Qm);
+ const IValueT Dn = mapQRegToDReg(Qn);
+
+ IValueT VqmovnOpcode = B25 | B24 | B23 | B21 | B20 | B17 | B9 |
+ (Saturating ? (Unsigned ? B6 : B7) : 0);
+
+ constexpr IValueT ElmtShift = 18;
+ VqmovnOpcode |= (encodeElmtType(DestElmtTy) << ElmtShift);
+
+ if (Qm != Qd) {
+ // Narrow first source operand to lower half of destination.
+ emitSIMDBase(VqmovnOpcode, Dd + 0, 0, Dm, UseQRegs, IsFloatTy);
+ // Narrow second source operand to upper half of destination.
+ emitSIMDBase(VqmovnOpcode, Dd + 1, 0, Dn, UseQRegs, IsFloatTy);
+ } else if (Qn != Qd) {
+ // Narrow second source operand to upper half of destination.
+ emitSIMDBase(VqmovnOpcode, Dd + 1, 0, Dn, UseQRegs, IsFloatTy);
+ // Narrow first source operand to lower half of destination.
+ emitSIMDBase(VqmovnOpcode, Dd + 0, 0, Dm, UseQRegs, IsFloatTy);
+
+ } else {
+ // Narrow first source operand to lower half of destination.
+ emitSIMDBase(VqmovnOpcode, Dd, 0, Dm, UseQRegs, IsFloatTy);
+
+ // VMOV Dd, Dm
+ // 111100100D10mmmmdddd0001MQM1mmmm
+ const IValueT VmovOpcode = B25 | B21 | B8 | B4;
+
+ emitSIMDBase(VmovOpcode, Dd + 1, Dd, Dd, UseQRegs, IsFloatTy);
+ }
+}
+
void AssemblerARM32::vsubqf(const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm) {
// VSUB (floating-point) - ARM section A8.8.415, Encoding A1:
@@ -3523,22 +3825,6 @@
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
-namespace {
-enum SIMDShiftType { ST_Vshl, ST_Vshr };
-IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
- const ConstantInteger32 *Imm6) {
- const IValueT Imm = Imm6->getValue();
- assert(Imm > 0);
- const SizeT MaxShift = getScalarIntBitWidth(ElmtTy);
- assert(Imm < MaxShift);
- assert(ElmtTy == IceType_i8 || ElmtTy == IceType_i16 ||
- ElmtTy == IceType_i32);
- const IValueT VshlImm = Imm - MaxShift;
- const IValueT VshrImm = 2 * MaxShift - Imm;
- return ((Shift == ST_Vshl) ? VshlImm : VshrImm) & (2 * MaxShift - 1);
-}
-} // end of anonymous namespace
-
void AssemblerARM32::vshlqc(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
diff --git a/third_party/subzero/src/IceAssemblerARM32.h b/third_party/subzero/src/IceAssemblerARM32.h
index a7e8481..1f80043 100644
--- a/third_party/subzero/src/IceAssemblerARM32.h
+++ b/third_party/subzero/src/IceAssemblerARM32.h
@@ -440,16 +440,34 @@
vldrs(OpSd, OpAddress, Cond, TInfo);
}
+ void vldrq(const Operand *OpQd, const Operand *OpAddress,
+ CondARM32::Cond Cond, const TargetInfo &TInfo);
+
+ void vldrq(const Operand *OpQd, const Operand *OpAddress,
+ CondARM32::Cond Cond, const TargetLowering *Lowering) {
+ const TargetInfo TInfo(Lowering);
+ vldrq(OpQd, OpAddress, Cond, TInfo);
+ }
+
// ElmtSize = #bits in vector element.
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetInfo &TInfo);
+ void vld1(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
+ const TargetInfo &TInfo);
+
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vld1qr(ElmtSize, OpQd, OpRn, TInfo);
}
+ void vld1(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
+ const TargetLowering *Lowering) {
+ const TargetInfo TInfo(Lowering);
+ vld1(ElmtSize, OpQd, OpRn, TInfo);
+ }
+
// Qn[i] = Imm for all i in vector. Returns true iff Imm can be defined as an
// Imm8 using AdvSIMDExpandImm().
bool vmovqc(const Operand *OpQd, const ConstantInteger32 *Imm);
@@ -520,6 +538,14 @@
void vmulqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm);
+ // Integer vector multiply high.
+ void vmulh(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
+ const Operand *OpQm, bool Unsigned);
+
+ // Integer vector multiply add pairwise.
+ void vmlap(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
+ const Operand *OpQm);
+
// Float vector multiply.
void vmulqf(const Operand *OpQd, const Operand *OpQn, const Operand *OpQm);
@@ -574,6 +600,15 @@
vstrs(OpSd, OpAddress, Cond, TInfo);
}
+ void vstrq(const Operand *OpQd, const Operand *OpAddress,
+ CondARM32::Cond Cond, const TargetInfo &TInfo);
+
+ void vstrq(const Operand *OpQd, const Operand *OpAddress,
+ CondARM32::Cond Cond, const TargetLowering *Lowering) {
+ const TargetInfo TInfo(Lowering);
+ vstrq(OpQd, OpAddress, Cond, TInfo);
+ }
+
// ElmtSize = #bits in vector element.
void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress,
const TargetInfo &TInfo);
@@ -584,6 +619,15 @@
vst1qr(ElmtSize, OpQd, OpRn, TInfo);
}
+ void vst1(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress,
+ const TargetInfo &TInfo);
+
+ void vst1(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
+ const TargetLowering *Lowering) {
+ const TargetInfo TInfo(Lowering);
+ vst1(ElmtSize, OpQd, OpRn, TInfo);
+ }
+
void vsubd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond);
@@ -603,6 +647,10 @@
void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
+ // Integer vector packing with optional saturation.
+ void vqmovn2(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+ const Operand *OpQn, bool Unsigned, bool Saturating);
+
// Float vector subtract
void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
@@ -732,6 +780,11 @@
DRegListSize NumDRegs, size_t ElmtSize, IValueT Align,
const char *InstName);
+ // Pattern 111100000D00nnnnddddss00aaaammmm | Opcode where Ddddd=Dd, nnnn=Rn,
+ // mmmmm=Rm, ElmtSize in {8, 16, 32) and defines ss, and aa=Align.
+ void emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn, IValueT Rm,
+ size_t ElmtSize, IValueT Align, const char *InstName);
+
// Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond,
// x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm.
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
diff --git a/third_party/subzero/src/IceInstARM32.cpp b/third_party/subzero/src/IceInstARM32.cpp
index 043f4a6..2f12b85 100644
--- a/third_party/subzero/src/IceInstARM32.cpp
+++ b/third_party/subzero/src/IceInstARM32.cpp
@@ -1087,6 +1087,87 @@
assert(!Asm->needsTextFixup());
}
+template <> void InstARM32Vqmovn2::emitIAS(const Cfg *Func) const {
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ const Operand *Src0 = getSrc(0);
+ const Operand *Src1 = getSrc(1);
+ Type SrcTy = Src0->getType();
+ Type DestTy = Dest->getType();
+ bool Unsigned = true;
+ bool Saturating = true;
+ switch (SrcTy) {
+ default:
+ llvm::report_fatal_error("Vqmovn2 not defined on type " +
+ typeStdString(SrcTy));
+ case IceType_v8i16:
+ case IceType_v4i32:
+ switch (Sign) {
+ case InstARM32::FS_None:
+ Unsigned = true;
+ Saturating = false;
+ Asm->vqmovn2(typeElementType(DestTy), Dest, Src0, Src1, Unsigned,
+ Saturating);
+ break;
+ case InstARM32::FS_Unsigned:
+ Unsigned = true;
+ Saturating = true;
+ Asm->vqmovn2(typeElementType(DestTy), Dest, Src0, Src1, Unsigned,
+ Saturating);
+ break;
+ case InstARM32::FS_Signed:
+ Unsigned = false;
+ Saturating = true;
+ Asm->vqmovn2(typeElementType(DestTy), Dest, Src0, Src1, Unsigned,
+ Saturating);
+ break;
+ }
+ break;
+ }
+ assert(!Asm->needsTextFixup());
+}
+
+template <> void InstARM32Vmulh::emitIAS(const Cfg *Func) const {
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ const Operand *Src0 = getSrc(0);
+ Type SrcTy = Src0->getType();
+ bool Unsigned = true;
+ switch (SrcTy) {
+ default:
+ llvm::report_fatal_error("Vmulh not defined on type " +
+ typeStdString(SrcTy));
+ case IceType_v8i16:
+ switch (Sign) {
+ case InstARM32::FS_None: // defaults to unsigned.
+ case InstARM32::FS_Unsigned:
+ Unsigned = true;
+ Asm->vmulh(typeElementType(SrcTy), Dest, getSrc(0), getSrc(1), Unsigned);
+ break;
+ case InstARM32::FS_Signed:
+ Unsigned = false;
+ Asm->vmulh(typeElementType(SrcTy), Dest, getSrc(0), getSrc(1), Unsigned);
+ break;
+ }
+ break;
+ }
+ assert(!Asm->needsTextFixup());
+}
+
+template <> void InstARM32Vmlap::emitIAS(const Cfg *Func) const {
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ const Operand *Src0 = getSrc(0);
+ const Operand *Src1 = getSrc(1);
+ Type SrcTy = Src0->getType();
+ switch (SrcTy) {
+ default:
+ llvm::report_fatal_error("Vmlap not defined on type " +
+ typeStdString(SrcTy));
+ case IceType_v8i16:
+ Asm->vmlap(typeElementType(SrcTy), Dest, Src0, Src1);
+ break;
+ }
+ assert(!Asm->needsTextFixup());
+}
+
template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
@@ -1336,6 +1417,14 @@
addSource(Mem);
}
+InstARM32Vstr1::InstARM32Vstr1(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
+ CondARM32::Cond Predicate, SizeT Size)
+ : InstARM32Pred(Func, InstARM32::Vstr1, 2, nullptr, Predicate) {
+ addSource(Value);
+ addSource(Mem);
+ this->Size = Size;
+}
+
InstARM32Trap::InstARM32Trap(Cfg *Func)
: InstARM32(Func, InstARM32::Trap, 0, nullptr) {}
@@ -1654,6 +1743,8 @@
// Mov-like ops
template <> const char *InstARM32Ldr::Opcode = "ldr";
template <> const char *InstARM32Ldrex::Opcode = "ldrex";
+template <> const char *InstARM32Vldr1d::Opcode = "vldr1d";
+template <> const char *InstARM32Vldr1q::Opcode = "vldr1q";
// Three-addr ops
template <> const char *InstARM32Adc::Opcode = "adc";
template <> const char *InstARM32Add::Opcode = "add";
@@ -1693,6 +1784,12 @@
const char *InstARM32ThreeAddrFP<InstARM32::Vqadd>::Opcode = "vqadd";
template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vqsub>::Opcode = "vqsub";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vqmovn2>::Opcode = "vqmovn2";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vmulh>::Opcode = "vmulh";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vmlap>::Opcode = "vmlap";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls";
@@ -2154,6 +2251,62 @@
getSrc(0)->emit(Func);
}
+template <> void InstARM32Vldr1d::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 1);
+ assert(getDest()->hasReg());
+ Variable *Dest = getDest();
+ Type Ty = Dest->getType();
+ const bool IsVector = isVectorType(Ty);
+ const bool IsScalarFloat = isScalarFloatingType(Ty);
+ const char *ActualOpcode =
+ IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr");
+ const char *WidthString = IsVector ? "" : getWidthString(Ty);
+ Str << "\t" << ActualOpcode;
+ const bool IsVInst = IsVector || IsScalarFloat;
+ if (IsVInst) {
+ Str << getPredicate() << WidthString;
+ } else {
+ Str << WidthString << getPredicate();
+ }
+ if (IsVector)
+ Str << "." << getVecElmtBitsize(Ty);
+ Str << "\t";
+ getDest()->emit(Func);
+ Str << ", ";
+ getSrc(0)->emit(Func);
+}
+
+template <> void InstARM32Vldr1q::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 1);
+ assert(getDest()->hasReg());
+ Variable *Dest = getDest();
+ Type Ty = Dest->getType();
+ const bool IsVector = isVectorType(Ty);
+ const bool IsScalarFloat = isScalarFloatingType(Ty);
+ const char *ActualOpcode =
+ IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr");
+ const char *WidthString = IsVector ? "" : getWidthString(Ty);
+ Str << "\t" << ActualOpcode;
+ const bool IsVInst = IsVector || IsScalarFloat;
+ if (IsVInst) {
+ Str << getPredicate() << WidthString;
+ } else {
+ Str << WidthString << getPredicate();
+ }
+ if (IsVector)
+ Str << "." << getVecElmtBitsize(Ty);
+ Str << "\t";
+ getDest()->emit(Func);
+ Str << ", ";
+ getSrc(0)->emit(Func);
+}
+
template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
@@ -2187,6 +2340,20 @@
}
}
+template <> void InstARM32Vldr1d::emitIAS(const Cfg *Func) const {
+ assert(getSrcSize() == 1);
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ Variable *Dest = getDest();
+ Asm->vld1(32, Dest, getSrc(0), Func->getTarget());
+}
+
+template <> void InstARM32Vldr1q::emitIAS(const Cfg *Func) const {
+ assert(getSrcSize() == 1);
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ Variable *Dest = getDest();
+ Asm->vld1(64, Dest, getSrc(0), Func->getTarget());
+}
+
template <> void InstARM32Ldrex::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -2593,6 +2760,51 @@
getSrc(0)->dump(Func);
}
+void InstARM32Vstr1::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 2);
+ Type Ty = getSrc(0)->getType();
+ const bool IsVectorStore = isVectorType(Ty);
+ const bool IsScalarFloat = isScalarFloatingType(Ty);
+ const char *Opcode =
+ IsVectorStore ? "vst1" : (IsScalarFloat ? "vstr" : "str");
+ Str << "\t" << Opcode;
+ const bool IsVInst = IsVectorStore || IsScalarFloat;
+ if (IsVInst) {
+ Str << getPredicate() << getWidthString(Ty);
+ } else {
+ Str << getWidthString(Ty) << getPredicate();
+ }
+ if (IsVectorStore)
+ Str << "." << getVecElmtBitsize(Ty);
+ Str << "\t";
+ getSrc(0)->emit(Func);
+ Str << ", ";
+ getSrc(1)->emit(Func);
+}
+
+void InstARM32Vstr1::emitIAS(const Cfg *Func) const {
+ assert(getSrcSize() == 2);
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ const Operand *Src0 = getSrc(0);
+ const Operand *Src1 = getSrc(1);
+ Asm->vst1(Size, Src0, Src1, Func->getTarget());
+}
+
+void InstARM32Vstr1::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Type Ty = getSrc(0)->getType();
+ dumpOpcodePred(Str, "str", Ty);
+ Str << " ";
+ getSrc(1)->dump(Func);
+ Str << ", ";
+ getSrc(0)->dump(Func);
+}
+
void InstARM32Trap::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -3166,10 +3378,14 @@
template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
+template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqmovn2>;
+template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vmulh>;
+template class InstARM32ThreeAddrFP<InstARM32::Vmlap>;
template class InstARM32LoadBase<InstARM32::Ldr>;
template class InstARM32LoadBase<InstARM32::Ldrex>;
-
+template class InstARM32LoadBase<InstARM32::Vldr1d>;
+template class InstARM32LoadBase<InstARM32::Vldr1q>;
template class InstARM32TwoAddrGPR<InstARM32::Movt>;
template class InstARM32UnaryopGPR<InstARM32::Movw, false>;
diff --git a/third_party/subzero/src/IceInstARM32.h b/third_party/subzero/src/IceInstARM32.h
index e1344dc..593d96d 100644
--- a/third_party/subzero/src/IceInstARM32.h
+++ b/third_party/subzero/src/IceInstARM32.h
@@ -435,18 +435,24 @@
Vcvt,
Vdiv,
Veor,
+ Vldr1d,
+ Vldr1q,
Vmla,
+ Vmlap,
Vmls,
Vmrs,
Vmul,
+ Vmulh,
Vmvn,
Vneg,
Vorr,
Vqadd,
+ Vqmovn2,
Vqsub,
Vshl,
Vshr,
Vsqrt,
+ Vstr1,
Vsub
};
@@ -1020,11 +1026,16 @@
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
+using InstARM32Vqmovn2 = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqmovn2>;
+using InstARM32Vmulh = InstARM32ThreeAddrSignAwareFP<InstARM32::Vmulh>;
+using InstARM32Vmlap = InstARM32ThreeAddrFP<InstARM32::Vmlap>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
+using InstARM32Vldr1d = InstARM32LoadBase<InstARM32::Vldr1d>;
+using InstARM32Vldr1q = InstARM32LoadBase<InstARM32::Vldr1q>;
/// MovT leaves the bottom bits alone so dest is also a source. This helps
/// indicate that a previous MovW setting dest is not dead code.
using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>;
@@ -1336,6 +1347,33 @@
OperandARM32Mem *Mem, CondARM32::Cond Predicate);
};
+/// Sub-vector store instruction. It's important for liveness that there is no
+/// Dest operand (OperandARM32Mem instead of Dest Variable).
+class InstARM32Vstr1 final : public InstARM32Pred {
+ InstARM32Vstr1() = delete;
+ InstARM32Vstr1(const InstARM32Vstr1 &) = delete;
+ InstARM32Vstr1 &operator=(const InstARM32Vstr1 &) = delete;
+
+public:
+ /// Value must be a register.
+ static InstARM32Vstr1 *create(Cfg *Func, Variable *Value,
+ OperandARM32Mem *Mem, CondARM32::Cond Predicate,
+ SizeT Size) {
+ return new (Func->allocate<InstARM32Vstr1>())
+ InstARM32Vstr1(Func, Value, Mem, Predicate, Size);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Instr) { return isClassof(Instr, Vstr1); }
+
+private:
+ InstARM32Vstr1(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
+ CondARM32::Cond Predicate, SizeT Size);
+
+ SizeT Size;
+};
+
class InstARM32Trap : public InstARM32 {
InstARM32Trap() = delete;
InstARM32Trap(const InstARM32Trap &) = delete;
@@ -1630,6 +1668,8 @@
template <> void InstARM32Ldr::emit(const Cfg *Func) const;
template <> void InstARM32Movw::emit(const Cfg *Func) const;
template <> void InstARM32Movt::emit(const Cfg *Func) const;
+template <> void InstARM32Vldr1d::emit(const Cfg *Func) const;
+template <> void InstARM32Vldr1q::emit(const Cfg *Func) const;
} // end of namespace ARM32
} // end of namespace Ice
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.cpp b/third_party/subzero/src/IceTargetLoweringARM32.cpp
index 65dca3a..9856f7a 100644
--- a/third_party/subzero/src/IceTargetLoweringARM32.cpp
+++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp
@@ -5331,23 +5331,75 @@
return;
}
case Intrinsics::LoadSubVector: {
- UnimplementedLoweringError(this, Instr);
+ assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
+ "LoadSubVector second argument must be a constant");
+ Variable *Dest = Instr->getDest();
+ Type Ty = Dest->getType();
+ auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
+ Operand *Addr = Instr->getArg(0);
+ OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
+ doMockBoundsCheck(Src);
+
+ if (Dest->isRematerializable()) {
+ Context.insert<InstFakeDef>(Dest);
+ return;
+ }
+
+ auto *T = makeReg(Ty);
+ switch (SubVectorSize->getValue()) {
+ case 4:
+ _vldr1d(T, Src);
+ break;
+ case 8:
+ _vldr1q(T, Src);
+ break;
+ default:
+ Func->setError("Unexpected size for LoadSubVector");
+ return;
+ }
+ _mov(Dest, T); // FIXME: necessary?
return;
}
case Intrinsics::StoreSubVector: {
- UnimplementedLoweringError(this, Instr);
+ assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
+ "StoreSubVector third argument must be a constant");
+ auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
+ Variable *Value = legalizeToReg(Instr->getArg(0));
+ Operand *Addr = Instr->getArg(1);
+ OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
+ doMockBoundsCheck(NewAddr);
+
+ Value = legalizeToReg(Value);
+
+ switch (SubVectorSize->getValue()) {
+ case 4:
+ _vstr1d(Value, NewAddr);
+ break;
+ case 8:
+ _vstr1q(Value, NewAddr);
+ break;
+ default:
+ Func->setError("Unexpected size for StoreSubVector");
+ return;
+ }
return;
}
case Intrinsics::MultiplyAddPairs: {
- UnimplementedLoweringError(this, Instr);
+ Variable *Src0 = legalizeToReg(Instr->getArg(0));
+ Variable *Src1 = legalizeToReg(Instr->getArg(1));
+ Variable *T = makeReg(DestTy);
+ _vmlap(T, Src0, Src1);
+ _mov(Dest, T);
return;
}
- case Intrinsics::MultiplyHighSigned: {
- UnimplementedLoweringError(this, Instr);
- return;
- }
+ case Intrinsics::MultiplyHighSigned:
case Intrinsics::MultiplyHighUnsigned: {
- UnimplementedLoweringError(this, Instr);
+ bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
+ Variable *Src0 = legalizeToReg(Instr->getArg(0));
+ Variable *Src1 = legalizeToReg(Instr->getArg(1));
+ Variable *T = makeReg(DestTy);
+ _vmulh(T, Src0, Src1, Unsigned);
+ _mov(Dest, T);
return;
}
case Intrinsics::Nearbyint: {
@@ -5372,12 +5424,15 @@
_mov(Dest, T);
return;
}
- case Intrinsics::VectorPackSigned: {
- UnimplementedLoweringError(this, Instr);
- return;
- }
+ case Intrinsics::VectorPackSigned:
case Intrinsics::VectorPackUnsigned: {
- UnimplementedLoweringError(this, Instr);
+ bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
+ bool Saturating = true;
+ Variable *Src0 = legalizeToReg(Instr->getArg(0));
+ Variable *Src1 = legalizeToReg(Instr->getArg(1));
+ Variable *T = makeReg(DestTy);
+ _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
+ _mov(Dest, T);
return;
}
default: // UnknownIntrinsic
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.h b/third_party/subzero/src/IceTargetLoweringARM32.h
index be848ed..a82337a 100644
--- a/third_party/subzero/src/IceTargetLoweringARM32.h
+++ b/third_party/subzero/src/IceTargetLoweringARM32.h
@@ -888,18 +888,33 @@
void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Veor>(Dest, Src0, Src1);
}
+ void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
+ }
+ void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
+ }
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vmrs>(Pred);
}
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
}
+ void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
+ Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
+ }
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
}
+ void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
+ Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
+ ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
+ }
void _vmvn(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
}
@@ -914,6 +929,13 @@
Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
+ void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
+ bool Saturating) {
+ Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
+ ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
+ : InstARM32::FS_Signed)
+ : InstARM32::FS_None);
+ }
void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
@@ -933,6 +955,14 @@
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
}
+ void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
+ }
+ void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
+ }
void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
}