Subzero: implement 64 bit multiply in mips32
Implement 64 bit multiply in mips32 and, in addition, add the lo/hi registers which are also used for other 64 bit math such as div, rem.
BUG=
R=jpp@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1716483003 .
Patch from Reed Kotler <rkotlerimgtec@gmail.com>.
diff --git a/src/IceInstMIPS32.cpp b/src/IceInstMIPS32.cpp
index 3e9b203..ad33992 100644
--- a/src/IceInstMIPS32.cpp
+++ b/src/IceInstMIPS32.cpp
@@ -56,7 +56,13 @@
template <> const char *InstMIPS32Add::Opcode = "add";
template <> const char *InstMIPS32Addu::Opcode = "addu";
template <> const char *InstMIPS32And::Opcode = "and";
+template <> const char *InstMIPS32Mfhi::Opcode = "mfhi";
+template <> const char *InstMIPS32Mflo::Opcode = "mflo";
+template <> const char *InstMIPS32Mthi::Opcode = "mthi";
+template <> const char *InstMIPS32Mtlo::Opcode = "mtlo";
template <> const char *InstMIPS32Mul::Opcode = "mul";
+template <> const char *InstMIPS32Mult::Opcode = "mult";
+template <> const char *InstMIPS32Multu::Opcode = "multu";
template <> const char *InstMIPS32Or::Opcode = "or";
template <> const char *InstMIPS32Ori::Opcode = "ori";
template <> const char *InstMIPS32Sltu::Opcode = "sltu";
@@ -64,6 +70,42 @@
template <> const char *InstMIPS32Subu::Opcode = "subu";
template <> const char *InstMIPS32Xor::Opcode = "xor";
+template <> void InstMIPS32Mflo::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ emitUnaryopGPRFLoHi(Opcode, this, Func);
+}
+
+template <> void InstMIPS32Mfhi::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ emitUnaryopGPRFLoHi(Opcode, this, Func);
+}
+
+template <> void InstMIPS32Mtlo::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ emitUnaryopGPRTLoHi(Opcode, this, Func);
+}
+
+template <> void InstMIPS32Mthi::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ emitUnaryopGPRTLoHi(Opcode, this, Func);
+}
+
+template <> void InstMIPS32Mult::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ emitThreeAddrLoHi(Opcode, this, Func);
+}
+
+template <> void InstMIPS32Multu::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ emitThreeAddrLoHi(Opcode, this, Func);
+}
+
InstMIPS32Call::InstMIPS32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
: InstMIPS32(Func, InstMIPS32::Call, 1, Dest) {
HasSideEffects = true;
@@ -125,6 +167,23 @@
Str << ", ";
Inst->getSrc(0)->emit(Func);
}
+void InstMIPS32::emitUnaryopGPRFLoHi(const char *Opcode, const InstMIPS32 *Inst,
+ const Cfg *Func) {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\t" << Opcode << "\t";
+ Inst->getDest()->emit(Func);
+}
+
+void InstMIPS32::emitUnaryopGPRTLoHi(const char *Opcode, const InstMIPS32 *Inst,
+ const Cfg *Func) {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\t" << Opcode << "\t";
+ Inst->getSrc(0)->emit(Func);
+}
void InstMIPS32::emitThreeAddr(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func) {
@@ -140,6 +199,18 @@
Inst->getSrc(1)->emit(Func);
}
+void InstMIPS32::emitThreeAddrLoHi(const char *Opcode, const InstMIPS32 *Inst,
+ const Cfg *Func) {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(Inst->getSrcSize() == 2);
+ Str << "\t" << Opcode << "\t";
+ Inst->getSrc(0)->emit(Func);
+ Str << ", ";
+ Inst->getSrc(1)->emit(Func);
+}
+
void InstMIPS32Ret::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
diff --git a/src/IceInstMIPS32.def b/src/IceInstMIPS32.def
index c7eb47d..99b58e9 100644
--- a/src/IceInstMIPS32.def
+++ b/src/IceInstMIPS32.def
@@ -39,70 +39,74 @@
#define REGMIPS32_GPR_TABLE \
/* val, encode, name, scratch, preserved, stackptr, frameptr, \
isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init */ \
- X(Reg_ZERO, = 0, "zero", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ X(Reg_ZERO, 0, "zero", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_ZERO)) \
- X(Reg_AT, = Reg_ZERO + 1, "at", 0, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_AT, 1, "at", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_AT)) \
- X(Reg_V0, = Reg_ZERO + 2, "v0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_V0, 2, "v0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_V0, Reg_V0V1)) \
- X(Reg_V1, = Reg_ZERO + 3, "v1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_V1, 3, "v1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_V1, Reg_V0V1)) \
- X(Reg_A0, = Reg_ZERO + 4, "a0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_A0, 4, "a0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A0, Reg_A0A1)) \
- X(Reg_A1, = Reg_ZERO + 5, "a1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_A1, 5, "a1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A1, Reg_A0A1)) \
- X(Reg_A2, = Reg_ZERO + 6, "a2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_A2, 6, "a2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A2, Reg_A2A3)) \
- X(Reg_A3, = Reg_ZERO + 7, "a3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_A3, 7, "a3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_A3, Reg_A2A3)) \
- X(Reg_T0, = Reg_ZERO + 8, "t0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T0, 8, "t0", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T0, Reg_T0T1)) \
- X(Reg_T1, = Reg_ZERO + 9, "t1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T1, 9, "t1", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T1, Reg_T0T1)) \
- X(Reg_T2, = Reg_ZERO + 10, "t2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T2, 10, "t2", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T2, Reg_T2T3)) \
- X(Reg_T3, = Reg_ZERO + 11, "t3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T3, 11, "t3", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T3, Reg_T2T3)) \
- X(Reg_T4, = Reg_ZERO + 12, "t4", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T4, 12, "t4", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T4, Reg_T4T5)) \
- X(Reg_T5, = Reg_ZERO + 14, "t5", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T5, 13, "t5", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T5, Reg_T4T5)) \
- X(Reg_T6, = Reg_ZERO + 14, "t6", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T6, 14, "t6", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T6, Reg_T6T7)) \
- X(Reg_T7, = Reg_ZERO + 15, "t7", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T7, 15, "t7", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T7, Reg_T6T7)) \
- X(Reg_S0, = Reg_ZERO + 16, "s0", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S0, 16, "s0", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S0, Reg_S0S1)) \
- X(Reg_S1, = Reg_ZERO + 17, "s1", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S1, 17, "s1", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S1, Reg_S0S1)) \
- X(Reg_S2, = Reg_ZERO + 18, "s2", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S2, 18, "s2", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S2, Reg_S2S3)) \
- X(Reg_S3, = Reg_ZERO + 19, "s3", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S3, 19, "s3", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S3, Reg_S2S3)) \
- X(Reg_S4, = Reg_ZERO + 20, "s4", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S4, 20, "s4", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S4, Reg_S4S5)) \
- X(Reg_S5, = Reg_ZERO + 21, "s5", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S5, 21, "s5", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S5, Reg_S4S5)) \
- X(Reg_S6, = Reg_ZERO + 22, "s6", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S6, 22, "s6", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S6, Reg_S6S7)) \
- X(Reg_S7, = Reg_ZERO + 23, "s7", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_S7, 23, "s7", 0, 1, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_S7, Reg_S6S7)) \
- X(Reg_T8, = Reg_ZERO + 23, "t8", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T8, 24, "t8", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T8, Reg_T8T9)) \
- X(Reg_T9, = Reg_ZERO + 25, "t9", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
+ X(Reg_T9, 25, "t9", 1, 0, 0, 0, 1, 0, 0, 0, 0, \
ALIASES2(Reg_T9, Reg_T8T9)) \
- X(Reg_K0, = Reg_ZERO + 26, "k0", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ X(Reg_K0, 26, "k0", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_K0)) \
- X(Reg_K1, = Reg_ZERO + 27, "k1", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ X(Reg_K1, 27, "k1", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_K1)) \
- X(Reg_GP, = Reg_ZERO + 28, "gp", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ X(Reg_GP, 28, "gp", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_GP)) \
- X(Reg_SP, = Reg_ZERO + 29, "sp", 0, 0, 1, 0, 0, 0, 0, 0, 0, \
+ X(Reg_SP, 29, "sp", 0, 0, 1, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_SP)) \
- X(Reg_FP, = Reg_ZERO + 30, "fp", 0, 0, 0, 1, 0, 0, 0, 0, 0, \
+ X(Reg_FP, 30, "fp", 0, 0, 0, 1, 0, 0, 0, 0, 0, \
ALIASES1(Reg_FP)) \
- X(Reg_RA, = Reg_ZERO + 31, "ra", 0, 1, 0, 0, 0, 0, 0, 0, 0, \
+ X(Reg_RA, 31, "ra", 0, 1, 0, 0, 0, 0, 0, 0, 0, \
ALIASES1(Reg_RA)) \
+ X(Reg_LO, 0, "lo", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ ALIASES2(Reg_LO, Reg_LOHI)) \
+ X(Reg_HI, 0, "hi", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ ALIASES2(Reg_HI, Reg_LOHI))
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
// TODO(reed kotler): List FP registers etc.
@@ -142,6 +146,8 @@
ALIASES3(Reg_S6, Reg_S7, Reg_S6S7)) \
X(Reg_T8T9, 26, "t8, t9", 1, 0, 0, 0, 0, 1, 0, 0, 0, \
ALIASES3(Reg_T8, Reg_T9, Reg_T8T9)) \
+ X(Reg_LOHI, 0, "lo, hi", 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ ALIASES3(Reg_LO, Reg_HI, Reg_LOHI)) \
//#define X(val, encode, name, scratch, preserved, stackptr, frameptr,
// isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)
@@ -160,9 +166,9 @@
#define REGMIPS32_TABLE_BOUNDS \
/* val, init */ \
X(Reg_GPR_First, = Reg_ZERO) \
- X(Reg_GPR_Last, = Reg_RA) \
+ X(Reg_GPR_Last, = Reg_HI) \
X(Reg_I64PAIR_First, = Reg_V0V1) \
- X(Reg_I64PAIR_Last, = Reg_T8T9) \
+ X(Reg_I64PAIR_Last, = Reg_LOHI) \
//define X(val, init)
// TODO(reed kotler): add condition code tables, etc.
diff --git a/src/IceInstMIPS32.h b/src/IceInstMIPS32.h
index dfa2146..4b492c0 100644
--- a/src/IceInstMIPS32.h
+++ b/src/IceInstMIPS32.h
@@ -124,8 +124,14 @@
Call,
La,
Lui,
+ Mfhi,
+ Mflo,
Mov, // actually a pseudo op for addi rd, rs, 0
+ Mthi,
+ Mtlo,
Mul,
+ Mult,
+ Multu,
Or,
Ori,
Ret,
@@ -149,8 +155,14 @@
/// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
+ static void emitUnaryopGPRFLoHi(const char *Opcode, const InstMIPS32 *Inst,
+ const Cfg *Func);
+ static void emitUnaryopGPRTLoHi(const char *Opcode, const InstMIPS32 *Inst,
+ const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstMIPS32 *Inst,
const Cfg *Func);
+ static void emitThreeAddrLoHi(const char *Opcode, const InstMIPS32 *Inst,
+ const Cfg *Func);
protected:
InstMIPS32(Cfg *Func, InstKindMIPS32 Kind, SizeT Maxsrcs, Variable *Dest)
@@ -379,7 +391,13 @@
using InstMIPS32And = InstMIPS32ThreeAddrGPR<InstMIPS32::And>;
using InstMIPS32Lui = InstMIPS32Imm16<InstMIPS32::Lui>;
using InstMIPS32La = InstMIPS32UnaryopGPR<InstMIPS32::La>;
+using InstMIPS32Mfhi = InstMIPS32UnaryopGPR<InstMIPS32::Mfhi>;
+using InstMIPS32Mflo = InstMIPS32UnaryopGPR<InstMIPS32::Mflo>;
+using InstMIPS32Mthi = InstMIPS32UnaryopGPR<InstMIPS32::Mthi>;
+using InstMIPS32Mtlo = InstMIPS32UnaryopGPR<InstMIPS32::Mtlo>;
using InstMIPS32Mul = InstMIPS32ThreeAddrGPR<InstMIPS32::Mul>;
+using InstMIPS32Mult = InstMIPS32ThreeAddrGPR<InstMIPS32::Mult>;
+using InstMIPS32Multu = InstMIPS32ThreeAddrGPR<InstMIPS32::Multu>;
using InstMIPS32Or = InstMIPS32ThreeAddrGPR<InstMIPS32::Or>;
using InstMIPS32Ori = InstMIPS32Imm16<InstMIPS32::Ori>;
using InstMIPS32Sltu = InstMIPS32ThreeAddrGPR<InstMIPS32::Sltu>;
diff --git a/src/IceRegistersMIPS32.h b/src/IceRegistersMIPS32.h
index a707284..4e14387 100644
--- a/src/IceRegistersMIPS32.h
+++ b/src/IceRegistersMIPS32.h
@@ -44,7 +44,7 @@
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
isI64Pair, isFP32, isFP64, isVec128, alias_init) \
\
- Encoded_##val encode,
+ Encoded_##val = encode,
REGMIPS32_GPR_TABLE
#undef X
Encoded_Not_GPR = -1
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index f9ee059..4fcd3ce 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -582,6 +582,7 @@
case InstArithmetic::Or:
case InstArithmetic::Sub:
case InstArithmetic::Xor:
+ case InstArithmetic::Mul:
break;
default:
UnimplementedLoweringError(this, Instr);
@@ -644,6 +645,24 @@
_mov(DestHi, T_Hi);
return;
}
+ case InstArithmetic::Mul: {
+ // TODO(rkotler): Make sure that mul has the side effect of clobbering
+ // LO, HI. Check for any other LO, HI quirkiness in this section.
+ auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
+ auto *T1 = I32Reg(), *T2 = I32Reg();
+ auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
+ _multu(T_Lo, Src0LoR, Src1LoR);
+ Context.insert<InstFakeDef>(T_Hi, T_Lo);
+ _mflo(T1, T_Lo);
+ _mfhi(T2, T_Hi);
+ _mov(DestLo, T1);
+ _mul(TM1, Src0HiR, Src1LoR);
+ _mul(TM2, Src0LoR, Src1HiR);
+ _addu(TM3, TM1, T2);
+ _addu(TM4, TM3, TM2);
+ _mov(DestHi, TM4);
+ return;
+ }
default:
UnimplementedLoweringError(this, Instr);
return;
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index b4afe42..a06dc80 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -175,10 +175,34 @@
}
}
+ void _mfhi(Variable *Dest, Operand *Src) {
+ Context.insert<InstMIPS32Mfhi>(Dest, Src);
+ }
+
+ void _mflo(Variable *Dest, Operand *Src) {
+ Context.insert<InstMIPS32Mflo>(Dest, Src);
+ }
+
+ void _mthi(Variable *Dest, Operand *Src) {
+ Context.insert<InstMIPS32Mthi>(Dest, Src);
+ }
+
+ void _mtlo(Variable *Dest, Operand *Src) {
+ Context.insert<InstMIPS32Mtlo>(Dest, Src);
+ }
+
void _mul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstMIPS32Mul>(Dest, Src0, Src1);
}
+ void _mult(Variable *Dest, Variable *Src0, Variable *Src1) {
+ Context.insert<InstMIPS32Mult>(Dest, Src0, Src1);
+ }
+
+ void _multu(Variable *Dest, Variable *Src0, Variable *Src1) {
+ Context.insert<InstMIPS32Multu>(Dest, Src0, Src1);
+ }
+
void _or(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstMIPS32Or>(Dest, Src0, Src1);
}
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index dc2739e..4106d3c 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -357,6 +357,15 @@
; ARM32: umull
; ARM32: add
+; MIPS32-LABEL: mul64BitSigned
+; MIPS32: multu
+; MIPS32: mflo
+; MIPS32: mfhi
+; MIPS32: mul
+; MIPS32: mul
+; MIPS32: addu
+; MIPS32: addu
+
define internal i64 @mul64BitUnsigned(i64 %a, i64 %b) {
entry:
%mul = mul i64 %b, %a
@@ -382,6 +391,15 @@
; ARM32: umull
; ARM32: add
+; MIPS32-LABEL: mul64BitUnsigned
+; MIPS32: multu
+; MIPS32: mflo
+; MIPS32: mfhi
+; MIPS32: mul
+; MIPS32: mul
+; MIPS32: addu
+; MIPS32: addu
+
define internal i64 @div64BitSigned(i64 %a, i64 %b) {
entry:
%div = sdiv i64 %a, %b