Subzero: Fix srem.i8/urem.i8 lowering for x86-64.
The problem was that the lowering sequence might lead to the rem result %ah being directly moved into an 8-bit register other than al/bl/cl/dl/ah/bh/ch/dh. This is not encodable, and attempting to encode it leads to actually moving from %spl instead of %ah.
The machinery to handle this was already available - copy through a temporary with register class RCX86_IsAhRcvr. So this was just a matter of hooking it up for srem/urem.
This CL also requires relaxing some checks in the register allocator for when the -reg-reserve option is used.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370
R=jpp@chromium.org
Review URL: https://codereview.chromium.org/1909853002 .
diff --git a/src/IceInstX8664.def b/src/IceInstX8664.def
index 42edba6..8ed221d 100644
--- a/src/IceInstX8664.def
+++ b/src/IceInstX8664.def
@@ -81,13 +81,13 @@
X(Reg_r15l, 15, "r15b", Reg_r15, 0,1,0,0,1, 1,0,0,0,1, 0, 0,0,0,1,0, \
REGLIST3(RegX8664, r15, r15d, r15w)) \
/* High 8-bit registers. None are allowed for register allocation. */ \
- X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rax, eax, ax)) \
- X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rcx, ecx, cx)) \
- X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rdx, edx, dx)) \
- X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0, \
REGLIST3(RegX8664, rbx, ebx, bx)) \
/* End of 8-bit register set */
//#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 53aa91b..06631bf 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -91,7 +91,7 @@
if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
MinWeightIndex = i;
}
- assert(MinWeightIndex >= 0);
+ assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
return MinWeightIndex;
}
@@ -686,7 +686,8 @@
// All the weights are now calculated. Find the register with smallest weight.
int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
- if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
+ if (MinWeightIndex < 0 ||
+ Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
if (!Iter.Cur->mustHaveReg()) {
// Iter.Cur doesn't have priority over any other live ranges, so don't
// allocate any register to it, and move it to the Handled state.
@@ -870,7 +871,10 @@
Iter.Cur = Unhandled.back();
Unhandled.pop_back();
dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
- assert(Target->getRegistersForVariable(Iter.Cur).any());
+ if (UseReserve)
+ assert(Target->getAllRegistersForVariable(Iter.Cur).any());
+ else
+ assert(Target->getRegistersForVariable(Iter.Cur).any());
Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
Iter.RegMaskUnfiltered =
RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 0907f87..4927e17 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -2308,6 +2308,14 @@
_mov(T_edx, Ctx->getConstantZero(Ty));
_mov(T, Src0, Eax);
_div(T_edx, Src1, T);
+ if (Ty == IceType_i8) {
+ // Register ah must be moved into one of {al,bl,cl,dl} before it can be
+ // moved into a general 8-bit register.
+ auto *T_AhRcvr = makeReg(Ty);
+ T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
+ _mov(T_AhRcvr, T_edx);
+ T_edx = T_AhRcvr;
+ }
_mov(Dest, T_edx);
} break;
case InstArithmetic::Srem: {
@@ -2377,6 +2385,14 @@
_mov(T, Src0, Eax);
_cbwdq(T_edx, T);
_idiv(T_edx, Src1, T);
+ if (Ty == IceType_i8) {
+ // Register ah must be moved into one of {al,bl,cl,dl} before it can be
+ // moved into a general 8-bit register.
+ auto *T_AhRcvr = makeReg(Ty);
+ T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
+ _mov(T_AhRcvr, T_edx);
+ T_edx = T_AhRcvr;
+ }
_mov(Dest, T_edx);
} break;
case InstArithmetic::Fadd:
diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
index 6d5781b..d11c857 100644
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -5,6 +5,15 @@
; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols | FileCheck %s
+; The following tests i8 srem/urem lowering on x86-64, specifically that the %ah
+; result gets copied into %al/%bl/%cl/%dl before moved into its final register.
+; This extra copy is forced by excluding al/bl/cl/dl by default (-reg-exclude),
+; but allowing them to be used if absolutely necessary (-reg-reserve).
+
+; RUN: %p2i --target=x8664 --filetype=obj --disassemble -i %s --args -O2 \
+; RUN: -reg-exclude=al,bl,cl,dl -reg-reserve \
+; RUN: -allow-externally-defined-symbols | FileCheck %s --check-prefix=REM
+
declare void @useInt(i32 %x)
define internal i32 @add8Bit(i32 %a, i32 %b) {
@@ -103,6 +112,9 @@
}
; CHECK-LABEL: urem8Bit
; CHECK: div {{[abcd]l|BYTE PTR}}
+; REM-LABEL: urem8Bit
+; REM: div
+; REM-NEXT: mov {{[abcd]}}l,ah
define internal i32 @urem8BitConst(i32 %a) {
entry:
@@ -113,6 +125,7 @@
}
; CHECK-LABEL: urem8BitConst
; CHECK: div {{[abcd]l|BYTE PTR}}
+; REM-LABEL: urem8BitConst
define internal i32 @sdiv8Bit(i32 %a, i32 %b) {
@@ -146,6 +159,9 @@
}
; CHECK-LABEL: srem8Bit
; CHECK: idiv {{[abcd]l|BYTE PTR}}
+; REM-LABEL: srem8Bit
+; REM: idiv
+; REM-NEXT: mov {{[abcd]}}l,ah
define internal i32 @srem8BitConst(i32 %a) {
entry:
@@ -156,6 +172,7 @@
}
; CHECK-LABEL: srem8BitConst
; CHECK: idiv {{[abcd]l|BYTE PTR}}
+; REM-LABEL: srem8BitConst
define internal i32 @shl8Bit(i32 %a, i32 %b) {
entry: