Subzero: lower the rest of the atomic operations.
64-bit ops are expanded via a cmpxchg8b loop.
64/32-bit and/or/xor are also expanded into a cmpxchg /
cmpxchg8b loop.
Add a cross test for atomic RMW operations and
compare and swap.
Misc: Test that atomic.is.lock.free can be optimized out if result is ignored.
TODO:
* optimize compare and swap with compare+branch further down
instruction stream.
* optimize atomic RMW when the return value is ignored
(adds a locked field to binary ops though).
* We may want to do some actual target-dependent basic
block splitting + expansion (the instructions inserted by
the expansion must reference the pre-colored registers,
etc.). Otherwise, we are currently getting by with modeling
the extended liveness of the variables used in the loops
using fake uses.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=jfb@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/362463002
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index cd5095f..c0e8c8d 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -51,9 +51,8 @@
llvm::array_lengthof(TypeX8632Attributes);
const char *InstX8632SegmentRegNames[] = {
-#define X(val, name) \
- name,
- SEG_REGX8632_TABLE
+#define X(val, name) name,
+ SEG_REGX8632_TABLE
#undef X
};
const size_t InstX8632SegmentRegNamesSize =
@@ -140,6 +139,33 @@
addSource(Source);
}
+InstX8632Cmpxchg::InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr,
+ Variable *Eax, Variable *Desired,
+ bool Locked)
+ : InstX8632Lockable(Func, InstX8632::Cmpxchg, 3,
+ llvm::dyn_cast<Variable>(DestOrAddr), Locked) {
+ assert(Eax->getRegNum() == TargetX8632::Reg_eax);
+ addSource(DestOrAddr);
+ addSource(Eax);
+ addSource(Desired);
+}
+
+InstX8632Cmpxchg8b::InstX8632Cmpxchg8b(Cfg *Func, OperandX8632 *Addr,
+ Variable *Edx, Variable *Eax,
+ Variable *Ecx, Variable *Ebx,
+ bool Locked)
+ : InstX8632Lockable(Func, InstX8632::Cmpxchg, 5, NULL, Locked) {
+ assert(Edx->getRegNum() == TargetX8632::Reg_edx);
+ assert(Eax->getRegNum() == TargetX8632::Reg_eax);
+ assert(Ecx->getRegNum() == TargetX8632::Reg_ecx);
+ assert(Ebx->getRegNum() == TargetX8632::Reg_ebx);
+ addSource(Addr);
+ addSource(Edx);
+ addSource(Eax);
+ addSource(Ecx);
+ addSource(Ebx);
+}
+
InstX8632Cvt::InstX8632Cvt(Cfg *Func, Variable *Dest, Operand *Source)
: InstX8632(Func, InstX8632::Cvt, 1, Dest) {
addSource(Source);
@@ -284,9 +310,14 @@
InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked)
- : InstX8632(Func, InstX8632::Xadd, 2, llvm::dyn_cast<Variable>(Dest)),
- Locked(Locked) {
- HasSideEffects = Locked;
+ : InstX8632Lockable(Func, InstX8632::Xadd, 2,
+ llvm::dyn_cast<Variable>(Dest), Locked) {
+ addSource(Dest);
+ addSource(Source);
+}
+
+InstX8632Xchg::InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source)
+ : InstX8632(Func, InstX8632::Xchg, 2, llvm::dyn_cast<Variable>(Dest)) {
addSource(Dest);
addSource(Source);
}
@@ -398,6 +429,7 @@
Str << "\n";
}
+template <> const char *InstX8632Neg::Opcode = "neg";
template <> const char *InstX8632Add::Opcode = "add";
template <> const char *InstX8632Addps::Opcode = "addps";
template <> const char *InstX8632Adc::Opcode = "adc";
@@ -554,6 +586,48 @@
dumpSources(Func);
}
+void InstX8632Cmpxchg::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 3);
+ if (Locked) {
+ Str << "\tlock";
+ }
+ Str << "\tcmpxchg\t";
+ getSrc(0)->emit(Func);
+ Str << ", ";
+ getSrc(2)->emit(Func);
+ Str << "\n";
+}
+
+void InstX8632Cmpxchg::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ if (Locked) {
+ Str << "lock ";
+ }
+ Str << "cmpxchg." << getSrc(0)->getType() << " ";
+ dumpSources(Func);
+}
+
+void InstX8632Cmpxchg8b::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 5);
+ if (Locked) {
+ Str << "\tlock";
+ }
+ Str << "\tcmpxchg8b\t";
+ getSrc(0)->emit(Func);
+ Str << "\n";
+}
+
+void InstX8632Cmpxchg8b::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ if (Locked) {
+ Str << "lock ";
+ }
+ Str << "cmpxchg8b ";
+ dumpSources(Func);
+}
+
void InstX8632Cvt::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
@@ -955,10 +1029,9 @@
void InstX8632Xadd::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
if (Locked) {
- Str << "\tlock xadd ";
- } else {
- Str << "\txadd\t";
+ Str << "\tlock";
}
+ Str << "\txadd\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
@@ -975,6 +1048,22 @@
dumpSources(Func);
}
+void InstX8632Xchg::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\txchg\t";
+ getSrc(0)->emit(Func);
+ Str << ", ";
+ getSrc(1)->emit(Func);
+ Str << "\n";
+}
+
+void InstX8632Xchg::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ Type Ty = getSrc(0)->getType();
+ Str << "xchg." << Ty << " ";
+ dumpSources(Func);
+}
+
void OperandX8632::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
Str << "<OperandX8632>";
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index baf072a..25beb6d 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -54,9 +54,8 @@
public:
enum SegmentRegisters {
DefaultSegment = -1,
-#define X(val, name) \
- val,
- SEG_REGX8632_TABLE
+#define X(val, name) val,
+ SEG_REGX8632_TABLE
#undef X
SegReg_NUM
};
@@ -142,6 +141,8 @@
Br,
Call,
Cdq,
+ Cmpxchg,
+ Cmpxchg8b,
Cvt,
Div,
Divps,
@@ -162,6 +163,7 @@
Mul,
Mulps,
Mulss,
+ Neg,
Or,
Pop,
Push,
@@ -183,6 +185,7 @@
Ucomiss,
UD2,
Xadd,
+ Xchg,
Xor
};
static const char *getWidthString(Type Ty);
@@ -328,6 +331,41 @@
virtual ~InstX8632Call() {}
};
+template <InstX8632::InstKindX8632 K>
+class InstX8632Unaryop : public InstX8632 {
+public:
+ // Create an unary-op instruction like neg.
+ // The source and dest are the same variable.
+ static InstX8632Unaryop *create(Cfg *Func, Operand *SrcDest) {
+ return new (Func->allocate<InstX8632Unaryop>())
+ InstX8632Unaryop(Func, SrcDest);
+ }
+ virtual void emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 1);
+ Str << "\t" << Opcode << "\t";
+ getSrc(0)->emit(Func);
+ Str << "\n";
+ }
+ virtual void dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ dumpDest(Func);
+ Str << " = " << Opcode << "." << getDest()->getType() << " ";
+ dumpSources(Func);
+ }
+ static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+ InstX8632Unaryop(Cfg *Func, Operand *SrcDest)
+ : InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
+ addSource(SrcDest);
+ }
+ InstX8632Unaryop(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
+ InstX8632Unaryop &operator=(const InstX8632Unaryop &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632Unaryop() {}
+ static const char *Opcode;
+};
+
// See the definition of emitTwoAddress() for a description of
// ShiftHack.
void emitTwoAddress(const char *Opcode, const Inst *Inst, const Cfg *Func,
@@ -400,6 +438,7 @@
static const char *Opcode;
};
+typedef InstX8632Unaryop<InstX8632::Neg> InstX8632Neg;
typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
@@ -423,6 +462,28 @@
typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
+// Base class for a lockable x86-32 instruction (emits a locked prefix).
+class InstX8632Lockable : public InstX8632 {
+public:
+ virtual void emit(const Cfg *Func) const = 0;
+ virtual void dump(const Cfg *Func) const;
+
+protected:
+ bool Locked;
+
+ InstX8632Lockable(Cfg *Func, InstKindX8632 Kind, SizeT Maxsrcs,
+ Variable *Dest, bool Locked)
+ : InstX8632(Func, Kind, Maxsrcs, Dest), Locked(Locked) {
+ // Assume that such instructions are used for Atomics and be careful
+ // with optimizations.
+ HasSideEffects = Locked;
+ }
+
+private:
+ InstX8632Lockable(const InstX8632Lockable &) LLVM_DELETED_FUNCTION;
+ InstX8632Lockable &operator=(const InstX8632Lockable &) LLVM_DELETED_FUNCTION;
+};
+
// Mul instruction - unsigned multiply.
class InstX8632Mul : public InstX8632 {
public:
@@ -502,6 +563,57 @@
virtual ~InstX8632Cdq() {}
};
+// Cmpxchg instruction - cmpxchg <dest>, <desired> will compare if <dest>
+// equals eax. If so, the ZF is set and <desired> is stored in <dest>.
+// If not, ZF is cleared and <dest> is copied to eax (or subregister).
+// <dest> can be a register or memory, while <desired> must be a register.
+// It is the user's responsiblity to mark eax with a FakeDef.
+class InstX8632Cmpxchg : public InstX8632Lockable {
+public:
+ static InstX8632Cmpxchg *create(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
+ Variable *Desired, bool Locked) {
+ return new (Func->allocate<InstX8632Cmpxchg>())
+ InstX8632Cmpxchg(Func, DestOrAddr, Eax, Desired, Locked);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg); }
+
+private:
+ InstX8632Cmpxchg(Cfg *Func, Operand *DestOrAddr, Variable *Eax,
+ Variable *Desired, bool Locked);
+ InstX8632Cmpxchg(const InstX8632Cmpxchg &) LLVM_DELETED_FUNCTION;
+ InstX8632Cmpxchg &operator=(const InstX8632Cmpxchg &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632Cmpxchg() {}
+};
+
+// Cmpxchg8b instruction - cmpxchg8b <m64> will compare if <m64>
+// equals edx:eax. If so, the ZF is set and ecx:ebx is stored in <m64>.
+// If not, ZF is cleared and <m64> is copied to edx:eax.
+// The caller is responsible for inserting FakeDefs to mark edx
+// and eax as modified.
+// <m64> must be a memory operand.
+class InstX8632Cmpxchg8b : public InstX8632Lockable {
+public:
+ static InstX8632Cmpxchg8b *create(Cfg *Func, OperandX8632 *Dest,
+ Variable *Edx, Variable *Eax, Variable *Ecx,
+ Variable *Ebx, bool Locked) {
+ return new (Func->allocate<InstX8632Cmpxchg8b>())
+ InstX8632Cmpxchg8b(Func, Dest, Edx, Eax, Ecx, Ebx, Locked);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Cmpxchg8b); }
+
+private:
+ InstX8632Cmpxchg8b(Cfg *Func, OperandX8632 *Dest, Variable *Edx,
+ Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked);
+ InstX8632Cmpxchg8b(const InstX8632Cmpxchg8b &) LLVM_DELETED_FUNCTION;
+ InstX8632Cmpxchg8b &
+ operator=(const InstX8632Cmpxchg8b &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632Cmpxchg8b() {}
+};
+
// Cvt instruction - wrapper for cvtsX2sY where X and Y are in {s,d,i}
// as appropriate. s=float, d=double, i=int. X and Y are determined
// from dest/src types. Sign and zero extension on the integer
@@ -861,7 +973,7 @@
//
// Both the dest and source are updated. The caller should then insert a
// FakeDef to reflect the second udpate.
-class InstX8632Xadd : public InstX8632 {
+class InstX8632Xadd : public InstX8632Lockable {
public:
static InstX8632Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
bool Locked) {
@@ -873,14 +985,35 @@
static bool classof(const Inst *Inst) { return isClassof(Inst, Xadd); }
private:
- bool Locked;
-
InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
InstX8632Xadd(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
InstX8632Xadd &operator=(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
virtual ~InstX8632Xadd() {}
};
+// Exchange instruction. Exchanges the first operand (destination
+// operand) with the second operand (source operand). At least one of
+// the operands must be a register (and the other can be reg or mem).
+// Both the Dest and Source are updated. If there is a memory operand,
+// then the instruction is automatically "locked" without the need for
+// a lock prefix.
+class InstX8632Xchg : public InstX8632 {
+public:
+ static InstX8632Xchg *create(Cfg *Func, Operand *Dest, Variable *Source) {
+ return new (Func->allocate<InstX8632Xchg>())
+ InstX8632Xchg(Func, Dest, Source);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Xchg); }
+
+private:
+ InstX8632Xchg(Cfg *Func, Operand *Dest, Variable *Source);
+ InstX8632Xchg(const InstX8632Xchg &) LLVM_DELETED_FUNCTION;
+ InstX8632Xchg &operator=(const InstX8632Xchg &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632Xchg() {}
+};
+
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEINSTX8632_H
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index 02562b5..b83513f 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp
@@ -46,7 +46,7 @@
"nacl.atomic.fence" },
{ { { Intrinsics::AtomicFenceAll, true }, { IceType_void }, 1 },
"nacl.atomic.fence.all" },
- { { { Intrinsics::AtomicIsLockFree, true },
+ { { { Intrinsics::AtomicIsLockFree, false },
{ IceType_i1, IceType_i32, IceType_i32 }, 3 },
"nacl.atomic.is.lock.free" },
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index f1b8c25..bf11573 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -1968,7 +1968,7 @@
void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
switch (Instr->getIntrinsicInfo().ID) {
- case Intrinsics::AtomicCmpxchg:
+ case Intrinsics::AtomicCmpxchg: {
if (!Intrinsics::VerifyMemoryOrder(
llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
@@ -1979,9 +1979,18 @@
Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
return;
}
- // TODO(jvoung): fill it in.
- Func->setError("Unhandled intrinsic");
+ Variable *DestPrev = Instr->getDest();
+ Operand *PtrToMem = Instr->getArg(0);
+ Operand *Expected = Instr->getArg(1);
+ Operand *Desired = Instr->getArg(2);
+ lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
+ // TODO(jvoung): If we peek ahead a few instructions and see how
+ // DestPrev is used (typically via another compare and branch),
+ // we may be able to optimize. If the result truly is used by a
+ // compare + branch, and the comparison is for equality, then we can
+ // optimize out the later compare, and fuse with the later branch.
return;
+ }
case Intrinsics::AtomicFence:
if (!Intrinsics::VerifyMemoryOrder(
llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
@@ -2183,18 +2192,54 @@
return;
}
+void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
+ Operand *Expected, Operand *Desired) {
+ if (Expected->getType() == IceType_i64) {
+ // Reserve the pre-colored registers first, before adding any more
+ // infinite-weight variables from FormMemoryOperand's legalization.
+ Variable *T_edx = makeReg(IceType_i32, Reg_edx);
+ Variable *T_eax = makeReg(IceType_i32, Reg_eax);
+ Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
+ Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
+ _mov(T_eax, loOperand(Expected));
+ _mov(T_edx, hiOperand(Expected));
+ _mov(T_ebx, loOperand(Desired));
+ _mov(T_ecx, hiOperand(Desired));
+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
+ const bool Locked = true;
+ _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
+ Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
+ _mov(DestLo, T_eax);
+ _mov(DestHi, T_edx);
+ return;
+ }
+ Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
+ _mov(T_eax, Expected);
+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
+ Variable *DesiredReg = legalizeToVar(Desired);
+ const bool Locked = true;
+ _cmpxchg(Addr, T_eax, DesiredReg, Locked);
+ _mov(DestPrev, T_eax);
+}
+
void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Operand *Ptr, Operand *Val) {
+ bool NeedsCmpxchg = false;
+ LowerBinOp Op_Lo = NULL;
+ LowerBinOp Op_Hi = NULL;
switch (Operation) {
default:
Func->setError("Unknown AtomicRMW operation");
return;
case Intrinsics::AtomicAdd: {
if (Dest->getType() == IceType_i64) {
- // Do a nasty cmpxchg8b loop. Factor this into a function.
- // TODO(jvoung): fill it in.
- Func->setError("Unhandled AtomicRMW operation");
- return;
+ // All the fall-through paths must set this to true, but use this
+ // for asserting.
+ NeedsCmpxchg = true;
+ Op_Lo = &TargetX8632::_add;
+ Op_Hi = &TargetX8632::_adc;
+ break;
}
OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
const bool Locked = true;
@@ -2206,26 +2251,160 @@
}
case Intrinsics::AtomicSub: {
if (Dest->getType() == IceType_i64) {
- // Do a nasty cmpxchg8b loop.
- // TODO(jvoung): fill it in.
- Func->setError("Unhandled AtomicRMW operation");
- return;
+ NeedsCmpxchg = true;
+ Op_Lo = &TargetX8632::_sub;
+ Op_Hi = &TargetX8632::_sbb;
+ break;
}
- // Generate a memory operand from Ptr.
- // neg...
- // Then do the same as AtomicAdd.
- // TODO(jvoung): fill it in.
- Func->setError("Unhandled AtomicRMW operation");
+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
+ const bool Locked = true;
+ Variable *T = NULL;
+ _mov(T, Val);
+ _neg(T);
+ _xadd(Addr, T, Locked);
+ _mov(Dest, T);
return;
}
case Intrinsics::AtomicOr:
+ // TODO(jvoung): If Dest is null or dead, then some of these
+ // operations do not need an "exchange", but just a locked op.
+ // That appears to be "worth" it for sub, or, and, and xor.
+ // xadd is probably fine vs lock add for add, and xchg is fine
+ // vs an atomic store.
+ NeedsCmpxchg = true;
+ Op_Lo = &TargetX8632::_or;
+ Op_Hi = &TargetX8632::_or;
+ break;
case Intrinsics::AtomicAnd:
+ NeedsCmpxchg = true;
+ Op_Lo = &TargetX8632::_and;
+ Op_Hi = &TargetX8632::_and;
+ break;
case Intrinsics::AtomicXor:
+ NeedsCmpxchg = true;
+ Op_Lo = &TargetX8632::_xor;
+ Op_Hi = &TargetX8632::_xor;
+ break;
case Intrinsics::AtomicExchange:
- // TODO(jvoung): fill it in.
- Func->setError("Unhandled AtomicRMW operation");
+ if (Dest->getType() == IceType_i64) {
+ NeedsCmpxchg = true;
+ // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
+ // just need to be moved to the ecx and ebx registers.
+ Op_Lo = NULL;
+ Op_Hi = NULL;
+ break;
+ }
+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
+ Variable *T = NULL;
+ _mov(T, Val);
+ _xchg(Addr, T);
+ _mov(Dest, T);
return;
}
+ // Otherwise, we need a cmpxchg loop.
+ assert(NeedsCmpxchg);
+ expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
+}
+
+void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
+ Variable *Dest, Operand *Ptr,
+ Operand *Val) {
+ // Expand a more complex RMW operation as a cmpxchg loop:
+ // For 64-bit:
+ // mov eax, [ptr]
+ // mov edx, [ptr + 4]
+ // .LABEL:
+ // mov ebx, eax
+ // <Op_Lo> ebx, <desired_adj_lo>
+ // mov ecx, edx
+ // <Op_Hi> ecx, <desired_adj_hi>
+ // lock cmpxchg8b [ptr]
+ // jne .LABEL
+ // mov <dest_lo>, eax
+ // mov <dest_lo>, edx
+ //
+ // For 32-bit:
+ // mov eax, [ptr]
+ // .LABEL:
+ // mov <reg>, eax
+ // op <reg>, [desired_adj]
+ // lock cmpxchg [ptr], <reg>
+ // jne .LABEL
+ // mov <dest>, eax
+ //
+ // If Op_{Lo,Hi} are NULL, then just copy the value.
+ Val = legalize(Val);
+ Type Ty = Val->getType();
+ if (Ty == IceType_i64) {
+ Variable *T_edx = makeReg(IceType_i32, Reg_edx);
+ Variable *T_eax = makeReg(IceType_i32, Reg_eax);
+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
+ _mov(T_eax, loOperand(Addr));
+ _mov(T_edx, hiOperand(Addr));
+ Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
+ Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
+ const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
+ if (!IsXchg8b) {
+ Context.insert(Label);
+ _mov(T_ebx, T_eax);
+ (this->*Op_Lo)(T_ebx, loOperand(Val));
+ _mov(T_ecx, T_edx);
+ (this->*Op_Hi)(T_ecx, hiOperand(Val));
+ } else {
+ // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
+ // It just needs the Val loaded into ebx and ecx.
+ // That can also be done before the loop.
+ _mov(T_ebx, loOperand(Val));
+ _mov(T_ecx, hiOperand(Val));
+ Context.insert(Label);
+ }
+ const bool Locked = true;
+ _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
+ _br(InstX8632Br::Br_ne, Label);
+ if (!IsXchg8b) {
+ // If Val is a variable, model the extended live range of Val through
+ // the end of the loop, since it will be re-used by the loop.
+ if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
+ Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
+ Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
+ Context.insert(InstFakeUse::create(Func, ValLo));
+ Context.insert(InstFakeUse::create(Func, ValHi));
+ }
+ } else {
+ // For xchg, the loop is slightly smaller and ebx/ecx are used.
+ Context.insert(InstFakeUse::create(Func, T_ebx));
+ Context.insert(InstFakeUse::create(Func, T_ecx));
+ }
+ // The address base is also reused in the loop.
+ Context.insert(InstFakeUse::create(Func, Addr->getBase()));
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ _mov(DestLo, T_eax);
+ _mov(DestHi, T_edx);
+ return;
+ }
+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
+ Variable *T_eax = makeReg(Ty, Reg_eax);
+ _mov(T_eax, Addr);
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
+ Context.insert(Label);
+ // We want to pick a different register for T than Eax, so don't use
+ // _mov(T == NULL, T_eax).
+ Variable *T = makeReg(Ty);
+ _mov(T, T_eax);
+ (this->*Op_Lo)(T, Val);
+ const bool Locked = true;
+ _cmpxchg(Addr, T_eax, T, Locked);
+ _br(InstX8632Br::Br_ne, Label);
+ // If Val is a variable, model the extended live range of Val through
+ // the end of the loop, since it will be re-used by the loop.
+ if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
+ Context.insert(InstFakeUse::create(Func, ValVar));
+ }
+ // The address base is also reused in the loop.
+ Context.insert(InstFakeUse::create(Func, Addr->getBase()));
+ _mov(Dest, T_eax);
}
namespace {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 001f4e6..4953ffc 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -95,9 +95,15 @@
virtual void doAddressOptLoad();
virtual void doAddressOptStore();
+ void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected,
+ Operand *Desired);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
+ typedef void (TargetX8632::*LowerBinOp)(Variable *, Operand *);
+ void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi,
+ Variable *Dest, Operand *Ptr, Operand *Val);
+
// Operand legalization helpers. To deal with address mode
// constraints, the helpers will create a new Operand and emit
// instructions that guarantee that the Operand kind is one of those
@@ -177,6 +183,22 @@
void _cmp(Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Icmp::create(Func, Src0, Src1));
}
+ void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired,
+ bool Locked) {
+ Context.insert(
+ InstX8632Cmpxchg::create(Func, DestOrAddr, Eax, Desired, Locked));
+ // Mark eax as possibly modified by cmpxchg.
+ Context.insert(
+ InstFakeDef::create(Func, Eax, llvm::dyn_cast<Variable>(DestOrAddr)));
+ }
+ void _cmpxchg8b(OperandX8632 *Addr, Variable *Edx, Variable *Eax,
+ Variable *Ecx, Variable *Ebx, bool Locked) {
+ Context.insert(
+ InstX8632Cmpxchg8b::create(Func, Addr, Edx, Eax, Ecx, Ebx, Locked));
+ // Mark edx, and eax as possibly modified by cmpxchg8b.
+ Context.insert(InstFakeDef::create(Func, Edx));
+ Context.insert(InstFakeDef::create(Func, Eax));
+ }
void _cvt(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Cvt::create(Func, Dest, Src0));
}
@@ -232,6 +254,9 @@
void _mulss(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Mulss::create(Func, Dest, Src0));
}
+ void _neg(Variable *SrcDest) {
+ Context.insert(InstX8632Neg::create(Func, SrcDest));
+ }
void _or(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Or::create(Func, Dest, Src0));
}
@@ -294,7 +319,14 @@
Context.insert(InstX8632Xadd::create(Func, Dest, Src, Locked));
// The xadd exchanges Dest and Src (modifying Src).
// Model that update with a FakeDef.
- Context.insert(InstFakeDef::create(Func, Src));
+ Context.insert(
+ InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
+ }
+ void _xchg(Operand *Dest, Variable *Src) {
+ Context.insert(InstX8632Xchg::create(Func, Dest, Src));
+ // The xchg modifies Dest and Src -- model that update with a FakeDef.
+ Context.insert(
+ InstFakeDef::create(Func, Src, llvm::dyn_cast<Variable>(Dest)));
}
void _xor(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Xor::create(Func, Dest, Src0));