Subzero: ARM32: lowering of vector insert and extract.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1655313002 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 6b9c5a0..e9b4f61 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -20,7 +20,6 @@
#include "IceCfgNode.h"
#include "IceInst.h"
#include "IceOperand.h"
-#include "IceRegistersARM32.h"
#include "IceTargetLoweringARM32.h"
namespace Ice {
@@ -28,6 +27,8 @@
namespace {
+using Register = RegARM32::AllRegisters;
+
// maximum number of registers allowed in vpush/vpop.
static constexpr SizeT VpushVpopMaxConsecRegs = 16;
@@ -1043,6 +1044,132 @@
}
}
+// These next two functions find the D register that maps to the half of the Q
+// register that this instruction is accessing.
+Register getDRegister(const Variable *Src, uint32_t Index) {
+ assert(Src->hasReg());
+ const auto SrcReg = static_cast<Register>(Src->getRegNum());
+
+ const RegARM32::RegTableType &SrcEntry = RegARM32::RegTable[SrcReg];
+ assert(SrcEntry.IsVec128);
+
+ const uint32_t NumElements = typeNumElements(Src->getType());
+
+ // This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in
+ // the next two branches help to check that this is still true.
+ if (Index < NumElements / 2) {
+ // We have a Q register that's made up of two D registers. This assert is
+ // to help ensure that we picked the right D register.
+ //
+ // TODO(jpp): find a way to do this that doesn't rely on ordering of the
+ // alias list.
+ assert(RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding + 1 ==
+ RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding);
+ return static_cast<Register>(SrcEntry.Aliases[1]);
+ } else {
+ // We have a Q register that's made up of two D registers. This assert is
+ // to help ensure that we picked the right D register.
+ //
+ // TODO(jpp): find a way to do this that doesn't rely on ordering of the
+ // alias list.
+ assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 ==
+ RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding);
+ return static_cast<Register>(SrcEntry.Aliases[2]);
+ }
+}
+
+constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) {
+ return (Index < NumElements / 2) ? Index : Index - (NumElements / 2);
+}
+
+// For floating point values, we can insertelement or extractelement by moving
+// directly from an S register. This function finds the right one.
+Register getSRegister(const Variable *Src, uint32_t Index) {
+ assert(Src->hasReg());
+ const auto SrcReg = static_cast<Register>(Src->getRegNum());
+
+ // For floating point values, we need to be allocated to Q0 - Q7, so we can
+ // directly access the value we want as one of the S registers.
+ assert(Src->getType() == IceType_v4f32);
+ assert(SrcReg < RegARM32::Reg_q8);
+
+ // This part assumes the register alias list goes q0, d0, d1, s0, s1, s2, s3.
+ assert(Index < 4);
+
+ // TODO(jpp): find a way to do this that doesn't rely on ordering of the alias
+ // list.
+ return static_cast<Register>(RegARM32::RegTable[SrcReg].Aliases[Index + 3]);
+}
+
+void InstARM32Extract::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ const Type DestTy = getDest()->getType();
+
+ const auto *Src = llvm::cast<Variable>(getSrc(0));
+
+ if (isIntegerType(DestTy)) {
+ Str << "\t"
+ << "vmov" << getPredicate();
+ const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT;
+ if (BitSize < 32) {
+ Str << ".s" << BitSize;
+ } else {
+ Str << "." << BitSize;
+ }
+ Str << "\t";
+ getDest()->emit(Func);
+ Str << ", ";
+
+ const size_t VectorSize = typeNumElements(Src->getType());
+
+ const Register SrcReg = getDRegister(Src, Index);
+
+ Str << RegARM32::RegTable[SrcReg].Name;
+ Str << "[" << getDIndex(VectorSize, Index) << "]";
+ } else if (isFloatingType(DestTy)) {
+ const Register SrcReg = getSRegister(Src, Index);
+
+ Str << "\t"
+ << "vmov" << getPredicate() << ".f32"
+ << "\t";
+ getDest()->emit(Func);
+ Str << ", " << RegARM32::RegTable[SrcReg].Name;
+ } else {
+ assert(false && "Invalid extract type");
+ }
+}
+
+void InstARM32Insert::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ const Variable *Dest = getDest();
+ const Type DestTy = getDest()->getType();
+
+ const auto *Src = llvm::cast<Variable>(getSrc(0));
+
+ if (isIntegerType(DestTy)) {
+ Str << "\t"
+ << "vmov" << getPredicate();
+ const size_t BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT;
+ Str << "." << BitSize << "\t";
+
+ const size_t VectorSize = typeNumElements(DestTy);
+ const Register DestReg = getDRegister(Dest, Index);
+ const uint32_t Index = getDIndex(VectorSize, this->Index);
+ Str << RegARM32::RegTable[DestReg].Name;
+ Str << "[" << Index << "], ";
+ Src->emit(Func);
+ } else if (isFloatingType(DestTy)) {
+ Str << "\t"
+ << "vmov" << getPredicate() << ".f32"
+ << "\t";
+ const Register DestReg = getSRegister(Dest, Index);
+ Str << RegARM32::RegTable[DestReg].Name << ", ";
+ Src->emit(Func);
+ } else {
+ assert(false && "Invalid insert type");
+ }
+}
+
template <InstARM32::InstKindARM32 K>
void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h
index 2be562a..56ca8af 100644
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -23,6 +23,7 @@
#include "IceInst.h"
#include "IceInstARM32.def"
#include "IceOperand.h"
+#include "IceRegistersARM32.h"
namespace Ice {
namespace ARM32 {
@@ -389,6 +390,8 @@
Cmp,
Dmb,
Eor,
+ Extract,
+ Insert,
Label,
Ldr,
Ldrex,
@@ -1349,6 +1352,62 @@
Variable *DestHi = nullptr;
};
+/// Generates vmov Rd, Dn[x] instructions, and their related floating point
+/// versions.
+class InstARM32Extract final : public InstARM32Pred {
+ InstARM32Extract() = delete;
+ InstARM32Extract(const InstARM32Extract &) = delete;
+ InstARM32Extract &operator=(const InstARM32Extract &) = delete;
+
+public:
+ static InstARM32Extract *create(Cfg *Func, Variable *Dest, Variable *Src0,
+ uint32_t Index, CondARM32::Cond Predicate) {
+ return new (Func->allocate<InstARM32Extract>())
+ InstARM32Extract(Func, Dest, Src0, Index, Predicate);
+ }
+ void emit(const Cfg *Func) const override;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Extract); }
+
+private:
+ InstARM32Extract(Cfg *Func, Variable *Dest, Variable *Src0, uint32_t Index,
+ CondARM32::Cond Predicate)
+ : InstARM32Pred(Func, InstARM32::Extract, 1, Dest, Predicate),
+ Index(Index) {
+ assert(Index < typeNumElements(Src0->getType()));
+ addSource(Src0);
+ }
+
+ const uint32_t Index;
+};
+
+/// Generates vmov Dn[x], Rd instructions, and their related floating point
+/// versions.
+class InstARM32Insert final : public InstARM32Pred {
+ InstARM32Insert() = delete;
+ InstARM32Insert(const InstARM32Insert &) = delete;
+ InstARM32Insert &operator=(const InstARM32Insert &) = delete;
+
+public:
+ static InstARM32Insert *create(Cfg *Func, Variable *Dest, Variable *Src0,
+ uint32_t Index, CondARM32::Cond Predicate) {
+ return new (Func->allocate<InstARM32Insert>())
+ InstARM32Insert(Func, Dest, Src0, Index, Predicate);
+ }
+ void emit(const Cfg *Func) const override;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Insert); }
+
+private:
+ InstARM32Insert(Cfg *Func, Variable *Dest, Variable *Src0, uint32_t Index,
+ CondARM32::Cond Predicate)
+ : InstARM32Pred(Func, InstARM32::Insert, 1, Dest, Predicate),
+ Index(Index) {
+ assert(Index < typeNumElements(Dest->getType()));
+ addSource(Src0);
+ }
+
+ const uint32_t Index;
+};
+
class InstARM32Vcmp final : public InstARM32Pred {
InstARM32Vcmp() = delete;
InstARM32Vcmp(const InstARM32Vcmp &) = delete;
diff --git a/src/IceRegistersARM32.h b/src/IceRegistersARM32.h
index 94f45c1..1ce05ab 100644
--- a/src/IceRegistersARM32.h
+++ b/src/IceRegistersARM32.h
@@ -219,8 +219,12 @@
return RegTable[RegNum].Name;
}
-// Extend enum RegClass with ARM32-specific register classes (if any).
-enum RegClassARM32 : uint8_t { RCARM32_NUM = RC_Target };
+// Extend enum RegClass with ARM32-specific register classes.
+enum RegClassARM32 : uint8_t {
+ RCARM32_QtoS = RC_Target, // Denotes Q registers that are aliased by S
+ // registers.
+ RCARM32_NUM
+};
} // end of namespace RegARM32
} // end of namespace ARM32
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 4b70dad..099ceb2 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -296,7 +296,9 @@
llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
+ llvm::SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
+ const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
const auto &Entry = RegARM32::RegTable[i];
IntegerRegisters[i] = Entry.IsInt;
@@ -305,6 +307,9 @@
Float64Registers[i] = Entry.IsFP64;
VectorRegisters[i] = Entry.IsVec128;
RegisterAliases[i].resize(RegARM32::Reg_NUM);
+ // TODO(eholk): It would be better to store a QtoS flag in the
+ // IceRegistersARM32 table than to compare their encodings here.
+ QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
for (int j = 0; j < Entry.NumAliases; ++j) {
assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
RegisterAliases[i].set(Entry.Aliases[j]);
@@ -340,6 +345,7 @@
TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
+ TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
@@ -3834,7 +3840,28 @@
}
void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
- UnimplementedLoweringError(this, Instr);
+ Variable *Dest = Instr->getDest();
+ Type DestTy = Dest->getType();
+
+ Variable *Src0 = legalizeToReg(Instr->getSrc(0));
+ Operand *Src1 = Instr->getSrc(1);
+
+ if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
+ const uint32_t Index = Imm->getValue();
+ Variable *T = makeReg(DestTy);
+ Variable *TSrc0 = makeReg(Src0->getType());
+
+ if (isFloatingType(DestTy)) {
+ // We need to make sure the source is in a suitable register.
+ TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
+ }
+
+ _mov(TSrc0, Src0);
+ _extractelement(T, TSrc0, Index);
+ _mov(Dest, T);
+ return;
+ }
+ assert(false && "extractelement requires a constant index");
}
namespace {
@@ -4229,7 +4256,28 @@
}
void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
- UnimplementedLoweringError(this, Instr);
+ Variable *Dest = Instr->getDest();
+ Type DestTy = Dest->getType();
+
+ Variable *Src0 = legalizeToReg(Instr->getSrc(0));
+ Variable *Src1 = legalizeToReg(Instr->getSrc(1));
+ Operand *Src2 = Instr->getSrc(2);
+
+ if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
+ const uint32_t Index = Imm->getValue();
+ Variable *T = makeReg(DestTy);
+
+ if (isFloatingType(DestTy)) {
+ T->setRegClass(RegARM32::RCARM32_QtoS);
+ }
+
+ _mov(T, Src0);
+ _insertelement(T, Src1, Index);
+ _set_dest_redefined();
+ _mov(Dest, T);
+ return;
+ }
+ assert(false && "insertelement requires a constant index");
}
namespace {
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 00f1483..b35649c 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -85,13 +85,18 @@
const llvm::SmallBitVector &
getRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
- assert(RC < RC_Target);
- return TypeToRegisterSet[RC];
+ switch (RC) {
+ default:
+ assert(RC < RC_Target);
+ return TypeToRegisterSet[RC];
+ case RegARM32::RCARM32_QtoS:
+ return TypeToRegisterSet[RC];
+ }
}
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
- assert(RC < RC_Target);
+ assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
@@ -413,6 +418,20 @@
}
}
+ // Generates a vmov instruction to extract the given index from a vector
+ // register.
+ void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
+ }
+
+ // Generates a vmov instruction to insert a value into the given index of a
+ // vector register.
+ void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
+ CondARM32::Cond Pred = CondARM32::AL) {
+ Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
+ }
+
// --------------------------------------------------------------------------
// Begin bool folding machinery.
//