Add atomic load/store, fetch_add, fence, and is-lock-free lowering. Loads/stores w/ type i8, i16, and i32 are converted to plain load/store instructions and lowered w/ the plain lowerLoad/lowerStore. Atomic stores are followed by an mfence for sequential consistency. For 64-bit types, use movq to do 64-bit memory loads/stores (vs the usual load/store being broken into separate 32-bit load/stores). This means bitcasting the i64 -> f64, first (which splits the load of the value to be stored into two 32-bit ops) then stores in a single op. For load, load into f64 then bitcast back to i64 (which splits after the atomic load). This follows what GCC does for c++11 std::atomic<uint64_t> load/store methods (uses movq when -mfpmath=sse). This introduces some redundancy between movq and movsd, but the convention seems to be to use movq when working with integer quantities. Otherwise, movsd could work too. The difference seems to be in whether or not the XMM register's upper 64-bits are filled with 0 or not. Zero-extending could help avoid partial register stalls. Handle up to i32 fetch_add. TODO: add i64 via a cmpxchg loop. TODO: add some runnable crosstests to make sure that this doesn't do funny things to integer bit patterns that happen to look like signaling NaNs and quiet NaNs. However, the system clang would not know how to handle "llvm.nacl.*" if we choose to target that level directly via .ll files. Or, (a) we use old-school __sync methods (sync_fetch_and_add w/ 0 to load) or (b) require buildbot's clang/gcc to support c++11... BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/342763004

commit: 5cd240dfe9c68b985f3e5023d070b16dd4f516c4 [log] [tgz]
author: Jan Voung <jvoung@chromium.org> Wed Jun 25 10:36:46 2014 -0700
committer: Jan Voung <jvoung@chromium.org> Wed Jun 25 10:36:46 2014 -0700
tree: 2a1bb278f15d7280a9564c24dad181d82d013722
parent: 1ee34165b98a62ab1b9e5148bd8b03cf0e86741e [diff]
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 6477683..376d454 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp

@@ -166,6 +166,11 @@
   addSource(Src2);
 }
 
+InstX8632Mfence::InstX8632Mfence(Cfg *Func)
+    : InstX8632(Func, InstX8632::Mfence, 0, NULL) {
+  HasSideEffects = true;
+}
+
 InstX8632Store::InstX8632Store(Cfg *Func, Operand *Value, OperandX8632 *Mem)
     : InstX8632(Func, InstX8632::Store, 2, NULL) {
   addSource(Value);
@@ -177,6 +182,17 @@
   addSource(Source);
 }
 
+InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem)
+    : InstX8632(Func, InstX8632::StoreQ, 2, NULL) {
+  addSource(Value);
+  addSource(Mem);
+}
+
+InstX8632Movq::InstX8632Movq(Cfg *Func, Variable *Dest, Operand *Source)
+    : InstX8632(Func, InstX8632::Movq, 1, Dest) {
+  addSource(Source);
+}
+
 InstX8632Movsx::InstX8632Movsx(Cfg *Func, Variable *Dest, Operand *Source)
     : InstX8632(Func, InstX8632::Movsx, 1, Dest) {
   addSource(Source);
@@ -221,12 +237,34 @@
   return false;
 }
 
+bool InstX8632Movq::isRedundantAssign() const {
+  Variable *Src = llvm::dyn_cast<Variable>(getSrc(0));
+  if (Src == NULL)
+    return false;
+  if (getDest()->hasReg() && getDest()->getRegNum() == Src->getRegNum()) {
+    return true;
+  }
+  if (!getDest()->hasReg() && !Src->hasReg() &&
+      Dest->getStackOffset() == Src->getStackOffset())
+    return true;
+  return false;
+}
+
 InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
     : InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
   if (Source)
     addSource(Source);
 }
 
+InstX8632Xadd::InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source,
+                             bool Locked)
+    : InstX8632(Func, InstX8632::Xadd, 2, llvm::dyn_cast<Variable>(Dest)),
+      Locked(Locked) {
+  HasSideEffects = Locked;
+  addSource(Dest);
+  addSource(Source);
+}
+
 // ======================== Dump routines ======================== //
 
 void InstX8632::dump(const Cfg *Func) const {
@@ -564,6 +602,17 @@
   dumpSources(Func);
 }
 
+void InstX8632Mfence::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 0);
+  Str << "\tmfence\n";
+}
+
+void InstX8632Mfence::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "mfence\n";
+}
+
 void InstX8632Store::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 2);
@@ -583,6 +632,26 @@
   getSrc(0)->dump(Func);
 }
 
+void InstX8632StoreQ::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  assert(getSrc(1)->getType() == IceType_i64 ||
+         getSrc(1)->getType() == IceType_f64);
+  Str << "\tmovq\t";
+  getSrc(1)->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632StoreQ::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "storeq." << getSrc(0)->getType() << " ";
+  getSrc(1)->dump(Func);
+  Str << ", ";
+  getSrc(0)->dump(Func);
+}
+
 void InstX8632Mov::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
@@ -611,6 +680,26 @@
   dumpSources(Func);
 }
 
+void InstX8632Movq::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 1);
+  assert(getDest()->getType() == IceType_i64 ||
+         getDest()->getType() == IceType_f64);
+  Str << "\tmovq\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632Movq::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "movq." << getDest()->getType() << " ";
+  dumpDest(Func);
+  Str << ", ";
+  dumpSources(Func);
+}
+
 void InstX8632Movsx::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
@@ -773,6 +862,29 @@
   dumpSources(Func);
 }
 
+void InstX8632Xadd::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  if (Locked) {
+    Str << "\tlock xadd ";
+  } else {
+    Str << "\txadd\t";
+  }
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632Xadd::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  if (Locked) {
+    Str << "lock ";
+  }
+  Type Ty = getSrc(0)->getType();
+  Str << "xadd." << Ty << " ";
+  dumpSources(Func);
+}
+
 void OperandX8632::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   Str << "<OperandX8632>";

diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 7e6e199..54df869 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h

@@ -151,7 +151,9 @@
     Imul,
     Label,
     Load,
+    Mfence,
     Mov,
+    Movq,
     Movsx,
     Movzx,
     Mul,
@@ -167,11 +169,13 @@
     Shr,
     Shrd,
     Store,
+    StoreQ,
     Sub,
     Subss,
     Test,
     Ucomiss,
     UD2,
+    Xadd,
     Xor
   };
   static const char *getWidthString(Type Ty);
@@ -578,6 +582,23 @@
   virtual ~InstX8632Test() {}
 };
 
+// Mfence instruction.
+class InstX8632Mfence : public InstX8632 {
+public:
+  static InstX8632Mfence *create(Cfg *Func) {
+    return new (Func->allocate<InstX8632Mfence>()) InstX8632Mfence(Func);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Mfence); }
+
+private:
+  InstX8632Mfence(Cfg *Func);
+  InstX8632Mfence(const InstX8632Mfence &) LLVM_DELETED_FUNCTION;
+  InstX8632Mfence &operator=(const InstX8632Mfence &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Mfence() {}
+};
+
 // This is essentially a "mov" instruction with an OperandX8632Mem
 // operand instead of Variable as the destination.  It's important
 // for liveness that there is no Dest operand.
@@ -617,6 +638,45 @@
   virtual ~InstX8632Mov() {}
 };
 
+// This is essentially a "movq" instruction with an OperandX8632Mem
+// operand instead of Variable as the destination.  It's important
+// for liveness that there is no Dest operand.
+class InstX8632StoreQ : public InstX8632 {
+public:
+  static InstX8632StoreQ *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
+    return new (Func->allocate<InstX8632StoreQ>())
+        InstX8632StoreQ(Func, Value, Mem);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, StoreQ); }
+
+private:
+  InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem);
+  InstX8632StoreQ(const InstX8632StoreQ &) LLVM_DELETED_FUNCTION;
+  InstX8632StoreQ &operator=(const InstX8632StoreQ &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632StoreQ() {}
+};
+
+// Movq - copy between XMM registers, or mem64 and XMM registers.
+class InstX8632Movq : public InstX8632 {
+public:
+  static InstX8632Movq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    return new (Func->allocate<InstX8632Movq>())
+        InstX8632Movq(Func, Dest, Source);
+  }
+  virtual bool isRedundantAssign() const;
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Movq); }
+
+private:
+  InstX8632Movq(Cfg *Func, Variable *Dest, Operand *Source);
+  InstX8632Movq(const InstX8632Movq &) LLVM_DELETED_FUNCTION;
+  InstX8632Movq &operator=(const InstX8632Movq &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Movq() {}
+};
+
 // Movsx - copy from a narrower integer type to a wider integer
 // type, with sign extension.
 class InstX8632Movsx : public InstX8632 {
@@ -744,6 +804,33 @@
   virtual ~InstX8632Ret() {}
 };
 
+// Exchanging Add instruction.  Exchanges the first operand (destination
+// operand) with the second operand (source operand), then loads the sum
+// of the two values into the destination operand. The destination may be
+// a register or memory, while the source must be a register.
+//
+// Both the dest and source are updated. The caller should then insert a
+// FakeDef to reflect the second udpate.
+class InstX8632Xadd : public InstX8632 {
+public:
+  static InstX8632Xadd *create(Cfg *Func, Operand *Dest, Variable *Source,
+                               bool Locked) {
+    return new (Func->allocate<InstX8632Xadd>())
+        InstX8632Xadd(Func, Dest, Source, Locked);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Xadd); }
+
+private:
+  bool Locked;
+
+  InstX8632Xadd(Cfg *Func, Operand *Dest, Variable *Source, bool Locked);
+  InstX8632Xadd(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
+  InstX8632Xadd &operator=(const InstX8632Xadd &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Xadd() {}
+};
+
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICEINSTX8632_H

diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index dbf79cf..02562b5 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp

@@ -82,7 +82,7 @@
   {                                                                            \
     {                                                                          \
       { Intrinsics::AtomicStore, true }                                        \
-      , { IceType_void, Overload, IceType_i32, IceType_i32 }, 5                \
+      , { IceType_void, Overload, IceType_i32, IceType_i32 }, 4                \
     }                                                                          \
     , "nacl.atomic.store." NameSuffix                                          \
   }
@@ -199,4 +199,9 @@
   return &it->second;
 }
 
+bool Intrinsics::VerifyMemoryOrder(uint64_t Order) {
+  // There is only one memory ordering for atomics allowed right now.
+  return Order == Intrinsics::MemoryOrderSequentiallyConsistent;
+}
+
 } // end of namespace Ice

diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 4f9f7de..3fbff44 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h

@@ -54,6 +54,39 @@
     Trap
   };
 
+  /// Operations that can be represented by the AtomicRMW
+  /// intrinsic.
+  ///
+  /// Do not reorder these values: their order offers forward
+  /// compatibility of bitcode targeted to PNaCl.
+  enum AtomicRMWOperation {
+    AtomicInvalid = 0, // Invalid, keep first.
+    AtomicAdd,
+    AtomicSub,
+    AtomicOr,
+    AtomicAnd,
+    AtomicXor,
+    AtomicExchange,
+    AtomicNum // Invalid, keep last.
+  };
+
+  /// Memory orderings supported by PNaCl IR.
+  ///
+  /// Do not reorder these values: their order offers forward
+  /// compatibility of bitcode targeted to PNaCl.
+  enum MemoryOrder {
+    MemoryOrderInvalid = 0, // Invalid, keep first.
+    MemoryOrderRelaxed,
+    MemoryOrderConsume,
+    MemoryOrderAcquire,
+    MemoryOrderRelease,
+    MemoryOrderAcquireRelease,
+    MemoryOrderSequentiallyConsistent,
+    MemoryOrderNum // Invalid, keep last.
+  };
+
+  static bool VerifyMemoryOrder(uint64_t Order);
+
   // Basic attributes related to each intrinsic, that are relevant to
   // code generation. We will want to have more attributes (e.g., Setjmp
   // returns twice and which affects stack coloring) once the lowering

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index af7b866..ef9bc22 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -431,7 +431,6 @@
   InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
 }
 
-// static
 Type TargetX8632::stackSlotType() { return IceType_i32; }
 
 void TargetX8632::addProlog(CfgNode *Node) {
@@ -1615,7 +1614,7 @@
       Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
       Spill->setWeight(RegWeight::Zero);
       Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
-      _mov(Spill, Src0RM);
+      _movq(Spill, Src0RM);
 
       Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
       Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
@@ -1658,7 +1657,7 @@
       _store(T_Lo, SpillLo);
       _mov(T_Hi, hiOperand(Src0));
       _store(T_Hi, SpillHi);
-      _mov(Dest, Spill);
+      _movq(Dest, Spill);
     } break;
     }
     break;
@@ -1800,16 +1799,140 @@
 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
   switch (Instr->getIntrinsicInfo().ID) {
   case Intrinsics::AtomicCmpxchg:
+    if (!Intrinsics::VerifyMemoryOrder(
+             llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
+      Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
+      return;
+    }
+    if (!Intrinsics::VerifyMemoryOrder(
+             llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
+      Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
+      return;
+    }
+    // TODO(jvoung): fill it in.
+    Func->setError("Unhandled intrinsic");
+    return;
   case Intrinsics::AtomicFence:
+    if (!Intrinsics::VerifyMemoryOrder(
+             llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
+      Func->setError("Unexpected memory ordering for AtomicFence");
+      return;
+    }
+    _mfence();
+    return;
   case Intrinsics::AtomicFenceAll:
-  case Intrinsics::AtomicIsLockFree:
-  case Intrinsics::AtomicLoad:
+    // NOTE: FenceAll should prevent and load/store from being moved
+    // across the fence (both atomic and non-atomic). The InstX8632Mfence
+    // instruction is currently marked coarsely as "HasSideEffects".
+    _mfence();
+    return;
+  case Intrinsics::AtomicIsLockFree: {
+    // X86 is always lock free for 8/16/32/64 bit accesses.
+    // TODO(jvoung): Since the result is constant when given a constant
+    // byte size, this opens up DCE opportunities.
+    Operand *ByteSize = Instr->getArg(0);
+    Variable *Dest = Instr->getDest();
+    if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) {
+      Constant *Result;
+      switch (CI->getValue()) {
+      default:
+        // Some x86-64 processors support the cmpxchg16b intruction, which
+        // can make 16-byte operations lock free (when used with the LOCK
+        // prefix). However, that's not supported in 32-bit mode, so just
+        // return 0 even for large sizes.
+        Result = Ctx->getConstantZero(IceType_i32);
+        break;
+      case 1:
+      case 2:
+      case 4:
+      case 8:
+        Result = Ctx->getConstantInt(IceType_i32, 1);
+        break;
+      }
+      _mov(Dest, Result);
+      return;
+    }
+    // The PNaCl ABI requires the byte size to be a compile-time constant.
+    Func->setError("AtomicIsLockFree byte size should be compile-time const");
+    return;
+  }
+  case Intrinsics::AtomicLoad: {
+    // We require the memory address to be naturally aligned.
+    // Given that is the case, then normal loads are atomic.
+    if (!Intrinsics::VerifyMemoryOrder(
+             llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) {
+      Func->setError("Unexpected memory ordering for AtomicLoad");
+      return;
+    }
+    Variable *Dest = Instr->getDest();
+    if (Dest->getType() == IceType_i64) {
+      // Follow what GCC does and use a movq instead of what lowerLoad()
+      // normally does (split the load into two).
+      // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
+      // can't happen anyway, since this is x86-32 and integer arithmetic only
+      // happens on 32-bit quantities.
+      Variable *T = makeReg(IceType_f64);
+      OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);
+      _movq(T, Addr);
+      // Then cast the bits back out of the XMM register to the i64 Dest.
+      InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
+      lowerCast(Cast);
+      // Make sure that the atomic load isn't elided.
+      Context.insert(InstFakeUse::create(Func, Dest->getLo()));
+      Context.insert(InstFakeUse::create(Func, Dest->getHi()));
+      return;
+    }
+    InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
+    lowerLoad(Load);
+    // Make sure the atomic load isn't elided.
+    Context.insert(InstFakeUse::create(Func, Dest));
+    return;
+  }
   case Intrinsics::AtomicRMW:
-  case Intrinsics::AtomicStore:
+    if (!Intrinsics::VerifyMemoryOrder(
+             llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
+      Func->setError("Unexpected memory ordering for AtomicRMW");
+      return;
+    }
+    lowerAtomicRMW(Instr->getDest(),
+                   static_cast<uint32_t>(llvm::cast<ConstantInteger>(
+                       Instr->getArg(0))->getValue()),
+                   Instr->getArg(1), Instr->getArg(2));
+    return;
+  case Intrinsics::AtomicStore: {
+    if (!Intrinsics::VerifyMemoryOrder(
+             llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) {
+      Func->setError("Unexpected memory ordering for AtomicStore");
+      return;
+    }
+    // We require the memory address to be naturally aligned.
+    // Given that is the case, then normal stores are atomic.
+    // Add a fence after the store to make it visible.
+    Operand *Value = Instr->getArg(0);
+    Operand *Ptr = Instr->getArg(1);
+    if (Value->getType() == IceType_i64) {
+      // Use a movq instead of what lowerStore() normally does
+      // (split the store into two), following what GCC does.
+      // Cast the bits from int -> to an xmm register first.
+      Variable *T = makeReg(IceType_f64);
+      InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
+      lowerCast(Cast);
+      // Then store XMM w/ a movq.
+      OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);
+      _storeq(T, Addr);
+      _mfence();
+      return;
+    }
+    InstStore *Store = InstStore::create(Func, Value, Ptr);
+    lowerStore(Store);
+    _mfence();
+    return;
+  }
   case Intrinsics::Bswap:
   case Intrinsics::Ctlz:
   case Intrinsics::Ctpop:
   case Intrinsics::Cttz:
+    // TODO(jvoung): fill it in.
     Func->setError("Unhandled intrinsic");
     return;
   case Intrinsics::Longjmp: {
@@ -1817,7 +1940,7 @@
     Call->addArg(Instr->getArg(0));
     Call->addArg(Instr->getArg(1));
     lowerCall(Call);
-    break;
+    return;
   }
   case Intrinsics::Memcpy: {
     // In the future, we could potentially emit an inline memcpy/memset, etc.
@@ -1827,7 +1950,7 @@
     Call->addArg(Instr->getArg(1));
     Call->addArg(Instr->getArg(2));
     lowerCall(Call);
-    break;
+    return;
   }
   case Intrinsics::Memmove: {
     InstCall *Call = makeHelperCall("memmove", NULL, 3);
@@ -1835,7 +1958,7 @@
     Call->addArg(Instr->getArg(1));
     Call->addArg(Instr->getArg(2));
     lowerCall(Call);
-    break;
+    return;
   }
   case Intrinsics::Memset: {
     // The value operand needs to be extended to a stack slot size
@@ -1849,32 +1972,33 @@
     Call->addArg(ValExt);
     Call->addArg(Instr->getArg(2));
     lowerCall(Call);
-    break;
+    return;
   }
   case Intrinsics::NaClReadTP: {
-    Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
+    Constant *Zero = Ctx->getConstantZero(IceType_i32);
     Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL,
                                            0, OperandX8632Mem::SegReg_GS);
     Variable *Dest = Instr->getDest();
     Variable *T = NULL;
     _mov(T, Src);
     _mov(Dest, T);
-    break;
+    return;
   }
   case Intrinsics::Setjmp: {
     InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
     Call->addArg(Instr->getArg(0));
     lowerCall(Call);
-    break;
+    return;
   }
   case Intrinsics::Sqrt:
   case Intrinsics::Stacksave:
   case Intrinsics::Stackrestore:
+    // TODO(jvoung): fill it in.
     Func->setError("Unhandled intrinsic");
     return;
   case Intrinsics::Trap:
     _ud2();
-    break;
+    return;
   case Intrinsics::UnknownIntrinsic:
     Func->setError("Should not be lowering UnknownIntrinsic");
     return;
@@ -1882,6 +2006,51 @@
   return;
 }
 
+void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
+                                 Operand *Ptr, Operand *Val) {
+  switch (Operation) {
+  default:
+    Func->setError("Unknown AtomicRMW operation");
+    return;
+  case Intrinsics::AtomicAdd: {
+    if (Dest->getType() == IceType_i64) {
+      // Do a nasty cmpxchg8b loop. Factor this into a function.
+      // TODO(jvoung): fill it in.
+      Func->setError("Unhandled AtomicRMW operation");
+      return;
+    }
+    OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
+    const bool Locked = true;
+    Variable *T = NULL;
+    _mov(T, Val);
+    _xadd(Addr, T, Locked);
+    _mov(Dest, T);
+    return;
+  }
+  case Intrinsics::AtomicSub: {
+    if (Dest->getType() == IceType_i64) {
+      // Do a nasty cmpxchg8b loop.
+      // TODO(jvoung): fill it in.
+      Func->setError("Unhandled AtomicRMW operation");
+      return;
+    }
+    // Generate a memory operand from Ptr.
+    // neg...
+    // Then do the same as AtomicAdd.
+    // TODO(jvoung): fill it in.
+    Func->setError("Unhandled AtomicRMW operation");
+    return;
+  }
+  case Intrinsics::AtomicOr:
+  case Intrinsics::AtomicAnd:
+  case Intrinsics::AtomicXor:
+  case Intrinsics::AtomicExchange:
+    // TODO(jvoung): fill it in.
+    Func->setError("Unhandled AtomicRMW operation");
+    return;
+  }
+}
+
 namespace {
 
 bool isAdd(const Inst *Inst) {
@@ -2018,15 +2187,7 @@
   // optimization already creates an OperandX8632Mem operand, so it
   // doesn't need another level of transformation.
   Type Ty = Inst->getDest()->getType();
-  Operand *Src0 = Inst->getSourceAddress();
-  // Address mode optimization already creates an OperandX8632Mem
-  // operand, so it doesn't need another level of transformation.
-  if (!llvm::isa<OperandX8632Mem>(Src0)) {
-    Variable *Base = llvm::dyn_cast<Variable>(Src0);
-    Constant *Offset = llvm::dyn_cast<Constant>(Src0);
-    assert(Base || Offset);
-    Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
-  }
+  Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
 
   // Fuse this load with a subsequent Arithmetic instruction in the
   // following situations:
@@ -2034,6 +2195,8 @@
   //   a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
   //
   // TODO: Clean up and test thoroughly.
+  // (E.g., if there is an mfence-all make sure the load ends up on the
+  // same side of the fence).
   //
   // TODO: Why limit to Arithmetic instructions?  This could probably be
   // applied to most any instruction type.  Look at all source operands
@@ -2164,19 +2327,7 @@
 void TargetX8632::lowerStore(const InstStore *Inst) {
   Operand *Value = Inst->getData();
   Operand *Addr = Inst->getAddr();
-  OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
-  // Address mode optimization already creates an OperandX8632Mem
-  // operand, so it doesn't need another level of transformation.
-  if (!NewAddr) {
-    // The address will be either a constant (which represents a global
-    // variable) or a variable, so either the Base or Offset component
-    // of the OperandX8632Mem will be set.
-    Variable *Base = llvm::dyn_cast<Variable>(Addr);
-    Constant *Offset = llvm::dyn_cast<Constant>(Addr);
-    assert(Base || Offset);
-    NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
-  }
-  NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
+  OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
 
   if (NewAddr->getType() == IceType_i64) {
     Value = legalize(Value);
@@ -2294,10 +2445,11 @@
       // need to go in uninitialized registers.
       From = Ctx->getConstantZero(From->getType());
     }
-    bool NeedsReg = !(Allowed & Legal_Imm) ||
+    bool NeedsReg =
+        !(Allowed & Legal_Imm) ||
         // ConstantFloat and ConstantDouble are actually memory operands.
-        (!(Allowed & Legal_Mem) && (From->getType() == IceType_f32 ||
-                                    From->getType() == IceType_f64));
+        (!(Allowed & Legal_Mem) &&
+         (From->getType() == IceType_f32 || From->getType() == IceType_f64));
     if (NeedsReg) {
       Variable *Reg = makeReg(From->getType(), RegNum);
       _mov(Reg, From);
@@ -2330,6 +2482,20 @@
   return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
 }
 
+OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
+  OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
+  // It may be the case that address mode optimization already creates
+  // an OperandX8632Mem, so in that case it wouldn't need another level
+  // of transformation.
+  if (!Mem) {
+    Variable *Base = llvm::dyn_cast<Variable>(Operand);
+    Constant *Offset = llvm::dyn_cast<Constant>(Operand);
+    assert(Base || Offset);
+    Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
+  }
+  return llvm::cast<OperandX8632Mem>(legalize(Mem));
+}
+
 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
   // There aren't any 64-bit integer registers for x86-32.
   assert(Type != IceType_i64);

diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 7902136..972b29f 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h

@@ -94,6 +94,9 @@
   virtual void doAddressOptLoad();
   virtual void doAddressOptStore();
 
+  void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
+                      Operand *Val);
+
   // Operand legalization helpers.  To deal with address mode
   // constraints, the helpers will create a new Operand and emit
   // instructions that guarantee that the Operand kind is one of those
@@ -114,6 +117,10 @@
                     int32_t RegNum = Variable::NoRegister);
   Variable *legalizeToVar(Operand *From, bool AllowOverlap = false,
                           int32_t RegNum = Variable::NoRegister);
+  // Turn a pointer operand into a memory operand that can be
+  // used by a real load/store operation. Legalizes the operand as well.
+  // This is a nop if the operand is already a legal memory operand.
+  OperandX8632Mem *FormMemoryOperand(Operand *Ptr, Type Ty);
 
   Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
   InstCall *makeHelperCall(const IceString &Name, Variable *Dest,
@@ -180,6 +187,7 @@
   void _imul(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Imul::create(Func, Dest, Src0));
   }
+  void _mfence() { Context.insert(InstX8632Mfence::create(Func)); }
   // If Dest=NULL is passed in, then a new variable is created, marked
   // as infinite register allocation weight, and returned through the
   // in/out Dest argument.
@@ -191,6 +199,9 @@
       Context.insert(InstX8632Mov::create(Func, Dest, Src0));
     }
   }
+  void _movq(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Movq::create(Func, Dest, Src0));
+  }
   void _movsx(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movsx::create(Func, Dest, Src0));
   }
@@ -236,6 +247,9 @@
   void _store(Operand *Value, OperandX8632 *Mem) {
     Context.insert(InstX8632Store::create(Func, Value, Mem));
   }
+  void _storeq(Operand *Value, OperandX8632 *Mem) {
+    Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
+  }
   void _sub(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Sub::create(Func, Dest, Src0));
   }
@@ -249,6 +263,12 @@
     Context.insert(InstX8632Ucomiss::create(Func, Src0, Src1));
   }
   void _ud2() { Context.insert(InstX8632UD2::create(Func)); }
+  void _xadd(Operand *Dest, Variable *Src, bool Locked) {
+    Context.insert(InstX8632Xadd::create(Func, Dest, Src, Locked));
+    // The xadd exchanges Dest and Src (modifying Src).
+    // Model that update with a FakeDef.
+    Context.insert(InstFakeDef::create(Func, Src));
+  }
   void _xor(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Xor::create(Func, Dest, Src0));
   }

diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 2b323f7..c3a98df 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp

@@ -599,8 +599,6 @@
       }
     }
     if (Call->getNumArgs() + 1 != I->NumTypes) {
-      std::cerr << "Call->getNumArgs() " << (int)Call->getNumArgs()
-                << " I->NumTypes " << (int)I->NumTypes << "\n";
       report_fatal_error("Mismatched # of args.");
     }
     for (size_t i = 1; i < I->NumTypes; ++i) {
commit	5cd240dfe9c68b985f3e5023d070b16dd4f516c4	[log] [tgz]
author	Jan Voung <jvoung@chromium.org>	Wed Jun 25 10:36:46 2014 -0700
committer	Jan Voung <jvoung@chromium.org>	Wed Jun 25 10:36:46 2014 -0700
tree	2a1bb278f15d7280a9564c24dad181d82d013722
parent	1ee34165b98a62ab1b9e5148bd8b03cf0e86741e [diff]