ARM32: Lower more integer intrinsics and test.
Lower stacksave/restore.
Lower ctlz, cttz, bswap, and popcount. Popcount is just
done with a helper call. Ctz can use the clz instruction
after reversing the bits.
We can only crosstest stacksave/restore for now which
happens to be written in C for the C99 VLAs. The CXX
crosstests I can't seem to compile with the arm-cross-g++
(missing headers), so I will check that later after
resolving the cross compilation issue.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=jpp@chromium.org
Review URL: https://codereview.chromium.org/1222943003 .
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 6639da8..10fdfe1 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -1575,7 +1575,7 @@
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
- Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
for (auto &StackArg : StackArgs) {
ConstantInteger32 *Loc =
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
@@ -1662,7 +1662,7 @@
if (ParameterAreaSizeBytes) {
Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
- Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
_add(SP, SP, AddAmount);
}
@@ -2032,19 +2032,91 @@
return;
}
case Intrinsics::Bswap: {
- UnimplementedError(Func->getContext()->getFlags());
+ Variable *Dest = Instr->getDest();
+ Operand *Val = Instr->getArg(0);
+ Type Ty = Val->getType();
+ if (Ty == IceType_i64) {
+ Variable *Val_Lo = legalizeToVar(loOperand(Val));
+ Variable *Val_Hi = legalizeToVar(hiOperand(Val));
+ Variable *T_Lo = makeReg(IceType_i32);
+ Variable *T_Hi = makeReg(IceType_i32);
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ _rev(T_Lo, Val_Lo);
+ _rev(T_Hi, Val_Hi);
+ _mov(DestLo, T_Hi);
+ _mov(DestHi, T_Lo);
+ } else {
+ assert(Ty == IceType_i32 || Ty == IceType_i16);
+ Variable *ValR = legalizeToVar(Val);
+ Variable *T = makeReg(Ty);
+ _rev(T, ValR);
+ if (Val->getType() == IceType_i16) {
+ Operand *Sixteen =
+ legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
+ _lsr(T, T, Sixteen);
+ }
+ _mov(Dest, T);
+ }
return;
}
case Intrinsics::Ctpop: {
- UnimplementedError(Func->getContext()->getFlags());
+ Variable *Dest = Instr->getDest();
+ Operand *Val = Instr->getArg(0);
+ InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
+ ? H_call_ctpop_i32
+ : H_call_ctpop_i64,
+ Dest, 1);
+ Call->addArg(Val);
+ lowerCall(Call);
+ // The popcount helpers always return 32-bit values, while the intrinsic's
+ // signature matches some 64-bit platform's native instructions and
+ // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
+ // just in case the user doesn't do that in the IR or doesn't toss the bits
+ // via truncate.
+ if (Val->getType() == IceType_i64) {
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ _mov(DestHi, Zero);
+ }
return;
}
case Intrinsics::Ctlz: {
- UnimplementedError(Func->getContext()->getFlags());
+ // The "is zero undef" parameter is ignored and we always return
+ // a well-defined value.
+ Operand *Val = Instr->getArg(0);
+ Variable *ValLoR;
+ Variable *ValHiR = nullptr;
+ if (Val->getType() == IceType_i64) {
+ ValLoR = legalizeToVar(loOperand(Val));
+ ValHiR = legalizeToVar(hiOperand(Val));
+ } else {
+ ValLoR = legalizeToVar(Val);
+ }
+ lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
return;
}
case Intrinsics::Cttz: {
- UnimplementedError(Func->getContext()->getFlags());
+ // Essentially like Clz, but reverse the bits first.
+ Operand *Val = Instr->getArg(0);
+ Variable *ValLoR;
+ Variable *ValHiR = nullptr;
+ if (Val->getType() == IceType_i64) {
+ ValLoR = legalizeToVar(loOperand(Val));
+ ValHiR = legalizeToVar(hiOperand(Val));
+ Variable *TLo = makeReg(IceType_i32);
+ Variable *THi = makeReg(IceType_i32);
+ _rbit(TLo, ValLoR);
+ _rbit(THi, ValHiR);
+ ValLoR = THi;
+ ValHiR = TLo;
+ } else {
+ ValLoR = legalizeToVar(Val);
+ Variable *T = makeReg(IceType_i32);
+ _rbit(T, ValLoR);
+ ValLoR = T;
+ }
+ lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
return;
}
case Intrinsics::Fabs: {
@@ -2077,13 +2149,15 @@
return;
}
case Intrinsics::Memset: {
- // The value operand needs to be extended to a stack slot size
- // because the PNaCl ABI requires arguments to be at least 32 bits
- // wide.
+ // The value operand needs to be extended to a stack slot size because the
+ // PNaCl ABI requires arguments to be at least 32 bits wide.
Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->makeVariable(stackSlotType());
lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
+ // Technically, ARM has their own __aeabi_memset, but we can use plain
+ // memset too. The value and size argument need to be flipped if we ever
+ // decide to use __aeabi_memset.
InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
Call->addArg(Instr->getArg(0));
Call->addArg(ValExt);
@@ -2111,15 +2185,19 @@
return;
}
case Intrinsics::Stacksave: {
- UnimplementedError(Func->getContext()->getFlags());
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
+ Variable *Dest = Instr->getDest();
+ _mov(Dest, SP);
return;
}
case Intrinsics::Stackrestore: {
- UnimplementedError(Func->getContext()->getFlags());
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
+ Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
+ _mov_nonkillable(SP, Val);
return;
}
case Intrinsics::Trap:
- UnimplementedError(Func->getContext()->getFlags());
+ _trap();
return;
case Intrinsics::UnknownIntrinsic:
Func->setError("Should not be lowering UnknownIntrinsic");
@@ -2128,6 +2206,34 @@
return;
}
+void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
+ Type Ty = Dest->getType();
+ assert(Ty == IceType_i32 || Ty == IceType_i64);
+ Variable *T = makeReg(IceType_i32);
+ _clz(T, ValLoR);
+ if (Ty == IceType_i64) {
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Operand *Zero =
+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
+ Operand *ThirtyTwo =
+ legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
+ _cmp(ValHiR, Zero);
+ Variable *T2 = makeReg(IceType_i32);
+ _add(T2, T, ThirtyTwo);
+ _clz(T2, ValHiR, CondARM32::NE);
+ // T2 is actually a source as well when the predicate is not AL
+ // (since it may leave T2 alone). We use set_dest_nonkillable to
+ // prolong the liveness of T2 as if it was used as a source.
+ _set_dest_nonkillable();
+ _mov(DestLo, T2);
+ _mov(DestHi, Ctx->getConstantZero(IceType_i32));
+ return;
+ }
+ _mov(Dest, T);
+ return;
+}
+
void TargetARM32::lowerLoad(const InstLoad *Load) {
// A Load instruction can be treated the same as an Assign
// instruction, after the source operand is transformed into an
@@ -2186,7 +2292,7 @@
// eliminated. TODO: Are there more places where the fake use
// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
// have a ret instruction.
- Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Context.insert(InstFakeUse::create(Func, SP));
}