Removes references to ah.
AH is a thorn in the flesh for our X86-64 backend. The assembler was
designed to always encode the low 8-bit registers, so %ah would become
%spl. While it is true we **could** force %spl to always be encoded as
%ah, that would not work if the instruction has a rex prefix.
This CL removes references to %ah from TargetX86Base. There used to be
2 uses of ah in the target lowering:
1) To zero-extend %al before an unsigned div:
mov <<src0>>, %al
mov 0, %ah
div <<src1>>
This pattern has been changed to
xor %eax, %eax
mov <<src0>>, %al
div <<src1>>
2) To access the 8-bit remainder for 8-bit division:
mov %ah, <<dest>>
This pattern has been changed to
shr $8, %eax
mov %al, <<Dest>>
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1260163003.
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index f9b1e88..3129dad 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def
@@ -44,16 +44,10 @@
// all of the registers are considered and have distinct numberings.
// This is in contrast to the above, where the "encode" is based on how
// the register numbers will be encoded in binaries and values can overlap.
-// Note that the isI8 attributed of Reg_ah is not set. In general we
-// don't want the register allocator choosing Reg_ah, in particular
-// for lowering insertelement to pinsrb where internally we use an
-// 8-bit operand but externally pinsrb uses a 32-bit register, in
-// which Reg_ah doesn't map to eax.
#define REGX8632_TABLE \
/* val, encode, name, name16, name8, scratch, preserved, stackptr, \
frameptr, isI8, isInt, isFP */ \
REGX8632_GPR_TABLE \
- X(Reg_ah, 4, "???", "" , "ah", 0, 0, 0, 0, 0, 0, 0) \
REGX8632_XMM_TABLE
//#define X(val, encode, name, name16, name8, scratch, preserved, stackptr,
// frameptr, isI8, isInt, isFP)
@@ -73,8 +67,7 @@
X(Reg_al, = 0) \
X(Reg_cl, = 1) \
X(Reg_dl, = 2) \
- X(Reg_bl, = 3) \
- X(Reg_ah, = 4)
+ X(Reg_bl, = 3)
//#define X(val, encode)
// X86 segment registers.
diff --git a/src/IceInstX8664.def b/src/IceInstX8664.def
index dd4b712..7ad1eaa 100644
--- a/src/IceInstX8664.def
+++ b/src/IceInstX8664.def
@@ -67,15 +67,10 @@
// all of the registers are considered and have distinct numberings.
// This is in contrast to the above, where the "encode" is based on how
// the register numbers will be encoded in binaries and values can overlap.
-// We don't want the register allocator choosing Reg_ah, in particular
-// for lowering insertelement to pinsrb where internally we use an
-// 8-bit operand but externally pinsrb uses a 32-bit register, in
-// which Reg_ah doesn't map to eax.
#define REGX8664_TABLE \
/* val, encode, name64, name, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP */ \
REGX8664_GPR_TABLE \
- X(Reg_ah, = Reg_rax + 4, "?ah", "?ah", "?ah", "ah", 0, 0, 0, 0, 0, 0) \
REGX8664_XMM_TABLE
//#define X(val, encode, name, name32, name16, name8, scratch, preserved,
// stackptr, frameptr, isI8, isInt, isFP)
diff --git a/src/IceRegistersX8632.h b/src/IceRegistersX8632.h
index da920e2..b0d22bb 100644
--- a/src/IceRegistersX8632.h
+++ b/src/IceRegistersX8632.h
@@ -78,19 +78,20 @@
};
static inline GPRRegister getEncodedGPR(int32_t RegNum) {
- assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
+ assert(Reg_GPR_First <= RegNum);
+ assert(RegNum <= Reg_GPR_Last);
return GPRRegister(RegNum - Reg_GPR_First);
}
static inline XmmRegister getEncodedXmm(int32_t RegNum) {
- assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
+ assert(Reg_XMM_First <= RegNum);
+ assert(RegNum <= Reg_XMM_Last);
return XmmRegister(RegNum - Reg_XMM_First);
}
static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
- assert(RegNum == Reg_ah || (Reg_GPR_First <= RegNum && RegNum <= Reg_ebx));
- if (RegNum == Reg_ah)
- return Encoded_Reg_ah;
+ assert(Reg_GPR_First <= RegNum);
+ assert(RegNum <= Reg_ebx);
return ByteRegister(RegNum - Reg_GPR_First);
}
@@ -102,7 +103,8 @@
}
static inline X87STRegister getEncodedSTReg(int32_t RegNum) {
- assert(Encoded_X87ST_First <= RegNum && RegNum <= Encoded_X87ST_Last);
+ assert(Encoded_X87ST_First <= RegNum);
+ assert(RegNum <= Encoded_X87ST_Last);
return X87STRegister(RegNum);
}
};
diff --git a/src/IceRegistersX8664.h b/src/IceRegistersX8664.h
index f95fe9e..bc448b2 100644
--- a/src/IceRegistersX8664.h
+++ b/src/IceRegistersX8664.h
@@ -69,16 +69,20 @@
};
static inline GPRRegister getEncodedGPR(int32_t RegNum) {
- assert(Reg_GPR_First <= RegNum && RegNum <= Reg_GPR_Last);
+ assert(Reg_GPR_First <= RegNum);
+ assert(RegNum <= Reg_GPR_Last);
return GPRRegister(RegNum - Reg_GPR_First);
}
static inline XmmRegister getEncodedXmm(int32_t RegNum) {
- assert(Reg_XMM_First <= RegNum && RegNum <= Reg_XMM_Last);
+ assert(Reg_XMM_First <= RegNum);
+ assert(RegNum <= Reg_XMM_Last);
return XmmRegister(RegNum - Reg_XMM_First);
}
static inline ByteRegister getEncodedByteReg(int32_t RegNum) {
+ assert(Reg_GPR_First <= RegNum);
+ assert(RegNum <= Reg_GPR_Last);
return ByteRegister(RegNum - Reg_GPR_First);
}
diff --git a/src/IceTargetLoweringX8664.def b/src/IceTargetLoweringX8664.def
new file mode 100644
index 0000000..8ee61d0
--- /dev/null
+++ b/src/IceTargetLoweringX8664.def
@@ -0,0 +1,53 @@
+//===- subzero/src/IceTargetLoweringX8664.def - x86-64 X-macros -*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines certain patterns for lowering to x86-64 target
+// instructions, in the form of x-macros.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICETARGETLOWERINGX8664_DEF
+#define SUBZERO_SRC_ICETARGETLOWERINGX8664_DEF
+
+#define FCMPX8664_TABLE \
+ /* <---- scalar comparison ----> <- vector comparison -> */ \
+ /* val, dflt, swap, C1, C2, swap, predicate */ \
+ X(False, 0, 0, Br_None, Br_None, 0, Cmpps_Invalid) \
+ X(Oeq, 0, 0, Br_ne, Br_p, 0, Cmpps_eq) \
+ X(Ogt, 1, 0, Br_a, Br_None, 1, Cmpps_lt) \
+ X(Oge, 1, 0, Br_ae, Br_None, 1, Cmpps_le) \
+ X(Olt, 1, 1, Br_a, Br_None, 0, Cmpps_lt) \
+ X(Ole, 1, 1, Br_ae, Br_None, 0, Cmpps_le) \
+ X(One, 1, 0, Br_ne, Br_None, 0, Cmpps_Invalid) \
+ X(Ord, 1, 0, Br_np, Br_None, 0, Cmpps_ord) \
+ X(Ueq, 1, 0, Br_e, Br_None, 0, Cmpps_Invalid) \
+ X(Ugt, 1, 1, Br_b, Br_None, 0, Cmpps_nle) \
+ X(Uge, 1, 1, Br_be, Br_None, 0, Cmpps_nlt) \
+ X(Ult, 1, 0, Br_b, Br_None, 1, Cmpps_nle) \
+ X(Ule, 1, 0, Br_be, Br_None, 1, Cmpps_nlt) \
+ X(Une, 1, 0, Br_ne, Br_p, 0, Cmpps_neq) \
+ X(Uno, 1, 0, Br_p, Br_None, 0, Cmpps_unord) \
+ X(True, 1, 0, Br_None, Br_None, 0, Cmpps_Invalid) \
+//#define X(val, dflt, swapS, C1, C2, swapV, pred)
+
+#define ICMPX8664_TABLE \
+ /* val, C_32, C1_64, C2_64, C3_64 */ \
+ X(Eq, Br_e, Br_None, Br_ne, Br_e) \
+ X(Ne, Br_ne, Br_ne, Br_None, Br_ne) \
+ X(Ugt, Br_a, Br_a, Br_b, Br_a) \
+ X(Uge, Br_ae, Br_a, Br_b, Br_ae) \
+ X(Ult, Br_b, Br_b, Br_a, Br_b) \
+ X(Ule, Br_be, Br_b, Br_a, Br_be) \
+ X(Sgt, Br_g, Br_g, Br_l, Br_a) \
+ X(Sge, Br_ge, Br_g, Br_l, Br_ae) \
+ X(Slt, Br_l, Br_l, Br_g, Br_b) \
+ X(Sle, Br_le, Br_l, Br_g, Br_be) \
+//#define X(val, C_32, C1_64, C2_64, C3_64)
+
+#endif // SUBZERO_SRC_ICETARGETLOWERINGX8664_DEF
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 77048b0..d9cc5e4 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1850,12 +1850,21 @@
// immediates as the operand.
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
if (isByteSizedArithType(Dest->getType())) {
- Variable *T_ah = nullptr;
- Constant *Zero = Ctx->getConstantZero(IceType_i8);
+ // For 8-bit unsigned division we need to zero-extend al into ah. A mov
+ // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
+ // assembler refuses to encode %ah (encoding %spl with a REX prefix
+ // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
+ // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
+ // d[%lh], which means the X86 target lowering (and the register
+ // allocator) would have to be aware of this restriction. For now, we
+ // simply zero %eax completely, and move the dividend into %al.
+ Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+ Context.insert(InstFakeDef::create(Func, T_eax));
+ _xor(T_eax, T_eax);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
- _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
- _div(T, Src1, T_ah);
+ _div(T, Src1, T);
_mov(Dest, T);
+ Context.insert(InstFakeUse::create(Func, T_eax));
} else {
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
@@ -1917,12 +1926,21 @@
case InstArithmetic::Urem:
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
if (isByteSizedArithType(Dest->getType())) {
- Variable *T_ah = nullptr;
- Constant *Zero = Ctx->getConstantZero(IceType_i8);
+ Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+ Context.insert(InstFakeDef::create(Func, T_eax));
+ _xor(T_eax, T_eax);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
- _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
- _div(T_ah, Src1, T);
- _mov(Dest, T_ah);
+ Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax);
+ _div(T_al, Src1, T);
+ // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
+ // mov %ah, %al because it would make x86-64 codegen more complicated. If
+ // this ever becomes a problem we can introduce a pseudo rem instruction
+ // that returns the remainder in %al directly (and uses a mov for copying
+ // %ah to %al.)
+ static constexpr uint8_t AlSizeInBits = 8;
+ _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
+ _mov(Dest, T_al);
+ Context.insert(InstFakeUse::create(Func, T_eax));
} else {
Constant *Zero = Ctx->getConstantZero(IceType_i32);
_mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
@@ -1974,12 +1992,21 @@
}
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
if (isByteSizedArithType(Dest->getType())) {
- Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
+ // T is %al.
_cbwdq(T, T);
- Context.insert(InstFakeDef::create(Func, T_ah));
- _idiv(T_ah, Src1, T);
- _mov(Dest, T_ah);
+ _idiv(T, Src1, T);
+ Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
+ Context.insert(InstFakeDef::create(Func, T_eax));
+ // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
+ // mov %ah, %al because it would make x86-64 codegen more complicated. If
+ // this ever becomes a problem we can introduce a pseudo rem instruction
+ // that returns the remainder in %al directly (and uses a mov for copying
+ // %ah to %al.)
+ static constexpr uint8_t AlSizeInBits = 8;
+ _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
+ _mov(Dest, T);
+ Context.insert(InstFakeUse::create(Func, T_eax));
} else {
T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);