Subzero: Fix srem.i8/urem.i8 lowering for x86-64. The problem was that the lowering sequence might lead to the rem result %ah being directly moved into an 8-bit register other than al/bl/cl/dl/ah/bh/ch/dh. This is not encodable, and attempting to encode it leads to actually moving from %spl instead of %ah. The machinery to handle this was already available - copy through a temporary with register class RCX86_IsAhRcvr. So this was just a matter of hooking it up for srem/urem. This CL also requires relaxing some checks in the register allocator for when the -reg-reserve option is used. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370 R=jpp@chromium.org Review URL: https://codereview.chromium.org/1909853002 .

commit: 2655d9627f11b0c97f10eacf1f730bf66e5db05b [log] [tgz]
author: Jim Stichnoth <stichnot@chromium.org> Thu Apr 21 05:38:15 2016 -0700
committer: Jim Stichnoth <stichnot@chromium.org> Thu Apr 21 05:38:15 2016 -0700
tree: f82703b42297c05bda2136221c64f6b6338db3ee
parent: fd07ad08190cbb7c9818e79fab9059eb3ed329d7 [diff]
diff --git a/src/IceInstX8664.def b/src/IceInstX8664.def
index 42edba6..8ed221d 100644
--- a/src/IceInstX8664.def
+++ b/src/IceInstX8664.def

@@ -81,13 +81,13 @@
   X(Reg_r15l, 15, "r15b", Reg_r15, 0,1,0,0,1, 1,0,0,0,1, 0, 0,0,0,1,0,         \
     REGLIST3(RegX8664, r15, r15d, r15w))                                       \
   /* High 8-bit registers.  None are allowed for register allocation. */       \
-  X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_ah, 4, "ah", Reg_rax, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
     REGLIST3(RegX8664, rax, eax, ax))                                          \
-  X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_ch, 5, "ch", Reg_rcx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
     REGLIST3(RegX8664, rcx, ecx, cx))                                          \
-  X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_dh, 6, "dh", Reg_rdx, 1,0,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
     REGLIST3(RegX8664, rdx, edx, dx))                                          \
-  X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1,              \
+  X(Reg_bh, 7, "bh", Reg_rbx, 0,1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,0,              \
     REGLIST3(RegX8664, rbx, ebx, bx))                                          \
   /* End of 8-bit register set */
 //#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,

diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 53aa91b..06631bf 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp

@@ -91,7 +91,7 @@
     if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
       MinWeightIndex = i;
   }
-  assert(MinWeightIndex >= 0);
+  assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
   return MinWeightIndex;
 }
 
@@ -686,7 +686,8 @@
   // All the weights are now calculated. Find the register with smallest weight.
   int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
 
-  if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
+  if (MinWeightIndex < 0 ||
+      Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
     if (!Iter.Cur->mustHaveReg()) {
       // Iter.Cur doesn't have priority over any other live ranges, so don't
       // allocate any register to it, and move it to the Handled state.
@@ -870,7 +871,10 @@
     Iter.Cur = Unhandled.back();
     Unhandled.pop_back();
     dumpLiveRangeTrace("\nConsidering  ", Iter.Cur);
-    assert(Target->getRegistersForVariable(Iter.Cur).any());
+    if (UseReserve)
+      assert(Target->getAllRegistersForVariable(Iter.Cur).any());
+    else
+      assert(Target->getRegistersForVariable(Iter.Cur).any());
     Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
     Iter.RegMaskUnfiltered =
         RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);

diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 0907f87..4927e17 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -2308,6 +2308,14 @@
     _mov(T_edx, Ctx->getConstantZero(Ty));
     _mov(T, Src0, Eax);
     _div(T_edx, Src1, T);
+    if (Ty == IceType_i8) {
+      // Register ah must be moved into one of {al,bl,cl,dl} before it can be
+      // moved into a general 8-bit register.
+      auto *T_AhRcvr = makeReg(Ty);
+      T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
+      _mov(T_AhRcvr, T_edx);
+      T_edx = T_AhRcvr;
+    }
     _mov(Dest, T_edx);
   } break;
   case InstArithmetic::Srem: {
@@ -2377,6 +2385,14 @@
     _mov(T, Src0, Eax);
     _cbwdq(T_edx, T);
     _idiv(T_edx, Src1, T);
+    if (Ty == IceType_i8) {
+      // Register ah must be moved into one of {al,bl,cl,dl} before it can be
+      // moved into a general 8-bit register.
+      auto *T_AhRcvr = makeReg(Ty);
+      T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
+      _mov(T_AhRcvr, T_edx);
+      T_edx = T_AhRcvr;
+    }
     _mov(Dest, T_edx);
   } break;
   case InstArithmetic::Fadd:

diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
index 6d5781b..d11c857 100644
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll

@@ -5,6 +5,15 @@
 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \
 ; RUN:   -allow-externally-defined-symbols | FileCheck %s
 
+; The following tests i8 srem/urem lowering on x86-64, specifically that the %ah
+; result gets copied into %al/%bl/%cl/%dl before moved into its final register.
+; This extra copy is forced by excluding al/bl/cl/dl by default (-reg-exclude),
+; but allowing them to be used if absolutely necessary (-reg-reserve).
+
+; RUN: %p2i --target=x8664 --filetype=obj --disassemble -i %s --args -O2 \
+; RUN:   -reg-exclude=al,bl,cl,dl -reg-reserve \
+; RUN:   -allow-externally-defined-symbols | FileCheck %s --check-prefix=REM
+
 declare void @useInt(i32 %x)
 
 define internal i32 @add8Bit(i32 %a, i32 %b) {
@@ -103,6 +112,9 @@
 }
 ; CHECK-LABEL: urem8Bit
 ; CHECK: div {{[abcd]l|BYTE PTR}}
+; REM-LABEL: urem8Bit
+; REM: div
+; REM-NEXT: mov {{[abcd]}}l,ah
 
 define internal i32 @urem8BitConst(i32 %a) {
 entry:
@@ -113,6 +125,7 @@
 }
 ; CHECK-LABEL: urem8BitConst
 ; CHECK: div {{[abcd]l|BYTE PTR}}
+; REM-LABEL: urem8BitConst
 
 
 define internal i32 @sdiv8Bit(i32 %a, i32 %b) {
@@ -146,6 +159,9 @@
 }
 ; CHECK-LABEL: srem8Bit
 ; CHECK: idiv {{[abcd]l|BYTE PTR}}
+; REM-LABEL: srem8Bit
+; REM: idiv
+; REM-NEXT: mov {{[abcd]}}l,ah
 
 define internal i32 @srem8BitConst(i32 %a) {
 entry:
@@ -156,6 +172,7 @@
 }
 ; CHECK-LABEL: srem8BitConst
 ; CHECK: idiv {{[abcd]l|BYTE PTR}}
+; REM-LABEL: srem8BitConst
 
 define internal i32 @shl8Bit(i32 %a, i32 %b) {
 entry:
commit	2655d9627f11b0c97f10eacf1f730bf66e5db05b	[log] [tgz]
author	Jim Stichnoth <stichnot@chromium.org>	Thu Apr 21 05:38:15 2016 -0700
committer	Jim Stichnoth <stichnot@chromium.org>	Thu Apr 21 05:38:15 2016 -0700
tree	f82703b42297c05bda2136221c64f6b6338db3ee
parent	fd07ad08190cbb7c9818e79fab9059eb3ed329d7 [diff]