Lower the rest of the vector arithmetic operations. The instructions emitted by the lowering operations require memory operands to be aligned to 16 bytes. Since there is no support for aligning memory operands in Subzero, do the arithmetic in registers for now. Add vector arithmetic to the arith crosstest. Pass the -mstackrealign parameter to the crosstest clang so that llc code called back from Subzero code (helper calls) doesn't assume that the stack is aligned at the entry to the call. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/397833002

commit: 7fa22d8a73def01899c5f30f20b914c65d5850d5 [log] [tgz]
author: Matt Wala <wala@chromium.org> Thu Jul 17 12:41:31 2014 -0700
committer: Matt Wala <wala@chromium.org> Thu Jul 17 12:41:31 2014 -0700
tree: 30a91333e9a55c63b3aea9e8c19a8293cc024c96
parent: 83b8036b4e0fb45bfc0bb7e237279dce57bea42e [diff]
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index cc082d9..bb99440 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp

@@ -312,6 +312,21 @@
   return false;
 }
 
+InstX8632Pshufd::InstX8632Pshufd(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2)
+    : InstX8632(Func, InstX8632::Pshufd, 2, Dest) {
+  addSource(Source1);
+  addSource(Source2);
+}
+
+InstX8632Shufps::InstX8632Shufps(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2)
+    : InstX8632(Func, InstX8632::Shufps, 3, Dest) {
+  addSource(Dest);
+  addSource(Source1);
+  addSource(Source2);
+}
+
 InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
     : InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
   if (Source)
@@ -446,19 +461,23 @@
 template <> const char *InstX8632Addps::Opcode = "addps";
 template <> const char *InstX8632Adc::Opcode = "adc";
 template <> const char *InstX8632Addss::Opcode = "addss";
+template <> const char *InstX8632Padd::Opcode = "padd";
 template <> const char *InstX8632Sub::Opcode = "sub";
 template <> const char *InstX8632Subps::Opcode = "subps";
 template <> const char *InstX8632Subss::Opcode = "subss";
-template <> const char *InstX8632Psub::Opcode = "psub";
 template <> const char *InstX8632Sbb::Opcode = "sbb";
+template <> const char *InstX8632Psub::Opcode = "psub";
 template <> const char *InstX8632And::Opcode = "and";
 template <> const char *InstX8632Pand::Opcode = "pand";
 template <> const char *InstX8632Or::Opcode = "or";
+template <> const char *InstX8632Por::Opcode = "por";
 template <> const char *InstX8632Xor::Opcode = "xor";
 template <> const char *InstX8632Pxor::Opcode = "pxor";
 template <> const char *InstX8632Imul::Opcode = "imul";
 template <> const char *InstX8632Mulps::Opcode = "mulps";
 template <> const char *InstX8632Mulss::Opcode = "mulss";
+template <> const char *InstX8632Pmullw::Opcode = "pmullw";
+template <> const char *InstX8632Pmuludq::Opcode = "pmuludq";
 template <> const char *InstX8632Div::Opcode = "div";
 template <> const char *InstX8632Divps::Opcode = "divps";
 template <> const char *InstX8632Idiv::Opcode = "idiv";
@@ -490,6 +509,13 @@
   emitTwoAddress(buf, this, Func);
 }
 
+template <> void InstX8632Padd::emit(const Cfg *Func) const {
+  char buf[30];
+  snprintf(buf, llvm::array_lengthof(buf), "padd%s",
+           TypeX8632Attributes[getDest()->getType()].PackString);
+  emitTwoAddress(buf, this, Func);
+}
+
 template <> void InstX8632Subss::emit(const Cfg *Func) const {
   char buf[30];
   snprintf(buf, llvm::array_lengthof(buf), "sub%s",
@@ -497,6 +523,13 @@
   emitTwoAddress(buf, this, Func);
 }
 
+template <> void InstX8632Psub::emit(const Cfg *Func) const {
+  char buf[30];
+  snprintf(buf, llvm::array_lengthof(buf), "psub%s",
+           TypeX8632Attributes[getDest()->getType()].PackString);
+  emitTwoAddress(buf, this, Func);
+}
+
 template <> void InstX8632Mulss::emit(const Cfg *Func) const {
   char buf[30];
   snprintf(buf, llvm::array_lengthof(buf), "mul%s",
@@ -504,6 +537,18 @@
   emitTwoAddress(buf, this, Func);
 }
 
+template <> void InstX8632Pmullw::emit(const Cfg *Func) const {
+  assert(getSrc(0)->getType() == IceType_v8i16 &&
+         getSrc(1)->getType() == IceType_v8i16);
+  emitTwoAddress(Opcode, this, Func);
+}
+
+template <> void InstX8632Pmuludq::emit(const Cfg *Func) const {
+  assert(getSrc(0)->getType() == IceType_v4i32 &&
+         getSrc(1)->getType() == IceType_v4i32);
+  emitTwoAddress(Opcode, this, Func);
+}
+
 template <> void InstX8632Divss::emit(const Cfg *Func) const {
   char buf[30];
   snprintf(buf, llvm::array_lengthof(buf), "div%s",
@@ -1093,11 +1138,23 @@
   emitTwoAddress(buf, this, Func);
 }
 
-template <> void InstX8632Psub::emit(const Cfg *Func) const {
-  char buf[30];
-  snprintf(buf, llvm::array_lengthof(buf), "psub%s",
-           TypeX8632Attributes[getDest()->getType()].PackString);
-  emitTwoAddress(buf, this, Func);
+void InstX8632Pshufd::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  Str << "\tpshufd\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632Pshufd::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = pshufd." << getDest()->getType() << " ";
+  dumpSources(Func);
 }
 
 void InstX8632Ret::emit(const Cfg *Func) const {
@@ -1112,6 +1169,25 @@
   dumpSources(Func);
 }
 
+void InstX8632Shufps::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 3);
+  Str << "\tshufps\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Str << ", ";
+  getSrc(2)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632Shufps::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = shufps." << getDest()->getType() << " ";
+  dumpSources(Func);
+}
+
 void InstX8632Xadd::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   if (Locked) {

diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index 1f6b679..8930c29 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def

@@ -82,7 +82,7 @@
   X(IceType_v16i8, "?",  ""  , "b",  "xmmword ptr") \
   X(IceType_v8i16, "?",  ""  , "w",  "xmmword ptr") \
   X(IceType_v4i32, "dq", ""  , "d",  "xmmword ptr") \
-  X(IceType_v4f32, "ps", ""  , "",   "xmmword ptr") \
+  X(IceType_v4f32, "ps", ""  , "" ,  "xmmword ptr") \
 //#define X(tag, cvt, sdss, width)
 
 #endif // SUBZERO_SRC_ICEINSTX8632_DEF

diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 8b8a3fd..03605ca 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h

@@ -168,14 +168,19 @@
     Mulss,
     Neg,
     Or,
+    Padd,
     Pand,
     Pcmpeq,
     Pcmpgt,
+    Pmullw,
+    Pmuludq,
     Pop,
-    Push,
+    Por,
+    Pshufd,
     Psll,
     Psra,
     Psub,
+    Push,
     Pxor,
     Ret,
     Sar,
@@ -184,6 +189,7 @@
     Shld,
     Shr,
     Shrd,
+    Shufps,
     Sqrtss,
     Store,
     StoreQ,
@@ -455,6 +461,7 @@
 typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
 typedef InstX8632Binop<InstX8632::Adc> InstX8632Adc;
 typedef InstX8632Binop<InstX8632::Addss> InstX8632Addss;
+typedef InstX8632Binop<InstX8632::Padd> InstX8632Padd;
 typedef InstX8632Binop<InstX8632::Sub> InstX8632Sub;
 typedef InstX8632Binop<InstX8632::Subps> InstX8632Subps;
 typedef InstX8632Binop<InstX8632::Subss> InstX8632Subss;
@@ -463,11 +470,14 @@
 typedef InstX8632Binop<InstX8632::And> InstX8632And;
 typedef InstX8632Binop<InstX8632::Pand> InstX8632Pand;
 typedef InstX8632Binop<InstX8632::Or> InstX8632Or;
+typedef InstX8632Binop<InstX8632::Por> InstX8632Por;
 typedef InstX8632Binop<InstX8632::Xor> InstX8632Xor;
 typedef InstX8632Binop<InstX8632::Pxor> InstX8632Pxor;
 typedef InstX8632Binop<InstX8632::Imul> InstX8632Imul;
 typedef InstX8632Binop<InstX8632::Mulps> InstX8632Mulps;
 typedef InstX8632Binop<InstX8632::Mulss> InstX8632Mulss;
+typedef InstX8632Binop<InstX8632::Pmullw> InstX8632Pmullw;
+typedef InstX8632Binop<InstX8632::Pmuludq> InstX8632Pmuludq;
 typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
 typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
 typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
@@ -984,6 +994,27 @@
   virtual ~InstX8632Push() {}
 };
 
+// Pshufd - shuffle a vector of doublewords 
+class InstX8632Pshufd : public InstX8632 {
+public:
+  static InstX8632Pshufd *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2) {
+    return new (Func->allocate<InstX8632Pshufd>())
+        InstX8632Pshufd(Func, Dest, Source1, Source2);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Pshufd); }
+
+private:
+  InstX8632Pshufd(Cfg *Func, Variable *Dest, Operand *Source1,
+                  Operand *Source2);
+  InstX8632Pshufd(const InstX8632Pshufd &) LLVM_DELETED_FUNCTION;
+  InstX8632Pshufd &operator=(const InstX8632Pshufd &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Pshufd() {}
+  static const char *Opcode;
+};
+
 // Ret instruction.  Currently only supports the "ret" version that
 // does not pop arguments.  This instruction takes a Source operand
 // (for non-void returning functions) for liveness analysis, though
@@ -1004,6 +1035,27 @@
   virtual ~InstX8632Ret() {}
 };
 
+// Shufps - select from two vectors of floating point values
+class InstX8632Shufps : public InstX8632 {
+public:
+  static InstX8632Shufps *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                 Operand *Source2) {
+    return new (Func->allocate<InstX8632Shufps>())
+        InstX8632Shufps(Func, Dest, Source1, Source2);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Shufps); }
+
+private:
+  InstX8632Shufps(Cfg *Func, Variable *Dest, Operand *Source1,
+                  Operand *Source2);
+  InstX8632Shufps(const InstX8632Shufps &) LLVM_DELETED_FUNCTION;
+  InstX8632Shufps &operator=(const InstX8632Shufps &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Shufps() {}
+  static const char *Opcode;
+};
+
 // Exchanging Add instruction.  Exchanges the first operand (destination
 // operand) with the second operand (source operand), then loads the sum
 // of the two values into the destination operand. The destination may be

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 19a1256..2b14a65 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -90,6 +90,20 @@
 // The number of bits in a byte
 const unsigned X86_CHAR_BIT = 8;
 
+// Return a string representation of the type that is suitable for use
+// in an identifier.
+IceString typeIdentString(const Type Ty) {
+  IceString Str;
+  llvm::raw_string_ostream BaseOS(Str);
+  Ostream OS(&BaseOS);
+  if (isVectorType(Ty)) {
+    OS << "v" << typeNumElements(Ty) << typeElementType(Ty);
+  } else {
+    OS << Ty;
+  }
+  return BaseOS.str();
+}
+
 // In some cases, there are x-macros tables for both high-level and
 // low-level instructions/operands that use the same enum key value.
 // The tables are kept separate to maintain a proper separation
@@ -1139,58 +1153,206 @@
       break;
     }
   } else if (isVectorType(Dest->getType())) {
+    // TODO: Trap on integer divide and integer modulo by zero.
+    // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
+    //
+    // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
+    // registers.  This is a workaround of the fact that there is no
+    // support for aligning stack operands.  Once there is support,
+    // remove LEGAL_HACK.
+#define LEGAL_HACK(s) legalizeToVar((s))
     switch (Inst->getOp()) {
     case InstArithmetic::_num:
       llvm_unreachable("Unknown arithmetic operator");
       break;
-    case InstArithmetic::Add:
-    case InstArithmetic::And:
-    case InstArithmetic::Or:
-    case InstArithmetic::Xor:
-    case InstArithmetic::Sub:
-    case InstArithmetic::Mul:
-    case InstArithmetic::Shl:
-    case InstArithmetic::Lshr:
-    case InstArithmetic::Ashr:
-    case InstArithmetic::Udiv:
-    case InstArithmetic::Sdiv:
-    case InstArithmetic::Urem:
-    case InstArithmetic::Srem:
-      // TODO(wala): Handle these.
-      Func->setError("Unhandled instruction");
-      break;
+    case InstArithmetic::Add: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _padd(T, LEGAL_HACK(Src1));
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::And: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _pand(T, LEGAL_HACK(Src1));
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Or: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _por(T, LEGAL_HACK(Src1));
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Xor: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _pxor(T, LEGAL_HACK(Src1));
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Sub: {
+      Variable *T = makeReg(Dest->getType());
+      _movp(T, Src0);
+      _psub(T, LEGAL_HACK(Src1));
+      _movp(Dest, T);
+    } break;
+    case InstArithmetic::Mul: {
+      if (Dest->getType() == IceType_v4i32) {
+        // Lowering sequence:
+        // Note: The mask arguments have index 0 on the left.
+        //
+        // movups  T1, Src0
+        // pshufd  T2, Src0, {1,0,3,0}
+        // pshufd  T3, Src1, {1,0,3,0}
+        // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
+        // pmuludq T1, Src1
+        // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
+        // pmuludq T2, T3
+        // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
+        // shufps  T1, T2, {0,2,0,2}
+        // pshufd  T4, T1, {0,2,1,3}
+        // movups  Dest, T4
+        //
+        // TODO(wala): SSE4.1 has pmulld.
+
+        // Mask that directs pshufd to create a vector with entries
+        // Src[1, 0, 3, 0]
+        const unsigned Constant1030 = 0x31;
+        Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
+        // Mask that directs shufps to create a vector with entries
+        // Dest[0, 2], Src[0, 2]
+        const unsigned Mask0202 = 0x88;
+        // Mask that directs pshufd to create a vector with entries
+        // Src[0, 2, 1, 3]
+        const unsigned Mask0213 = 0xd8;
+        Variable *T1 = makeReg(IceType_v4i32);
+        Variable *T2 = makeReg(IceType_v4i32);
+        Variable *T3 = makeReg(IceType_v4i32);
+        Variable *T4 = makeReg(IceType_v4i32);
+        _movp(T1, Src0);
+        // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
+        // with Src1 after stack operand alignment support is
+        // implemented.
+        Variable *Src0R = LEGAL_HACK(Src0);
+        Variable *Src1R = LEGAL_HACK(Src1);
+        _pshufd(T2, Src0R, Mask1030);
+        _pshufd(T3, Src1R, Mask1030);
+        _pmuludq(T1, Src1R);
+        _pmuludq(T2, T3);
+        _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
+        _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
+        _movp(Dest, T4);
+      } else if (Dest->getType() == IceType_v8i16) {
+        Variable *T = makeReg(IceType_v8i16);
+        _movp(T, Src0);
+        _pmullw(T, legalizeToVar(Src1));
+        _movp(Dest, T);
+      } else {
+        assert(Dest->getType() == IceType_v16i8);
+        // Sz_mul_v16i8
+        const IceString Helper = "Sz_mul_v16i8";
+        const SizeT MaxSrcs = 2;
+        InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+        Call->addArg(Src0);
+        Call->addArg(Src1);
+        lowerCall(Call);
+      }
+    } break;
+    case InstArithmetic::Shl: {
+      // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
+      const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
+    case InstArithmetic::Lshr: {
+      // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
+      const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
+    case InstArithmetic::Ashr: {
+      // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
+      const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
+    case InstArithmetic::Udiv: {
+      // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
+      const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
+    case InstArithmetic::Sdiv: {
+      // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
+      const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
+    case InstArithmetic::Urem: {
+      // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
+      const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
+    case InstArithmetic::Srem: {
+      // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
+      const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
+      const SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
+      Call->addArg(Src0);
+      Call->addArg(Src1);
+      lowerCall(Call);
+    } break;
     case InstArithmetic::Fadd: {
       Variable *T = makeReg(Dest->getType());
       _movp(T, Src0);
-      _addps(T, Src1);
+      _addps(T, LEGAL_HACK(Src1));
       _movp(Dest, T);
     } break;
     case InstArithmetic::Fsub: {
       Variable *T = makeReg(Dest->getType());
       _movp(T, Src0);
-      _subps(T, Src1);
+      _subps(T, LEGAL_HACK(Src1));
       _movp(Dest, T);
     } break;
     case InstArithmetic::Fmul: {
       Variable *T = makeReg(Dest->getType());
       _movp(T, Src0);
-      _mulps(T, Src1);
+      _mulps(T, LEGAL_HACK(Src1));
       _movp(Dest, T);
     } break;
     case InstArithmetic::Fdiv: {
       Variable *T = makeReg(Dest->getType());
       _movp(T, Src0);
-      _divps(T, Src1);
+      _divps(T, LEGAL_HACK(Src1));
       _movp(Dest, T);
     } break;
     case InstArithmetic::Frem: {
       const SizeT MaxSrcs = 2;
-      InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs);
+      InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
       Call->addArg(Src0);
       Call->addArg(Src1);
       lowerCall(Call);
     } break;
     }
+#undef LEGAL_HACK
   } else { // Dest->getType() is non-i64 scalar
     Variable *T_edx = NULL;
     Variable *T = NULL;

diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index a77d39a..58d8781 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h

@@ -276,6 +276,9 @@
   void _or(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Or::create(Func, Dest, Src0));
   }
+  void _padd(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Padd::create(Func, Dest, Src0));
+  }
   void _pand(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pand::create(Func, Dest, Src0));
   }
@@ -285,11 +288,20 @@
   void _pcmpgt(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pcmpgt::create(Func, Dest, Src0));
   }
+  void _pmullw(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Pmullw::create(Func, Dest, Src0));
+  }
+  void _pmuludq(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Pmuludq::create(Func, Dest, Src0));
+  }
   void _pop(Variable *Dest) {
     Context.insert(InstX8632Pop::create(Func, Dest));
   }
-  void _push(Operand *Src0, bool SuppressStackAdjustment = false) {
-    Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment));
+  void _por(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Por::create(Func, Dest, Src0));
+  }
+  void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) {
+    Context.insert(InstX8632Pshufd::create(Func, Dest, Src0, Src1));
   }
   void _psll(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Psll::create(Func, Dest, Src0));
@@ -300,6 +312,9 @@
   void _psub(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Psub::create(Func, Dest, Src0));
   }
+  void _push(Operand *Src0, bool SuppressStackAdjustment = false) {
+    Context.insert(InstX8632Push::create(Func, Src0, SuppressStackAdjustment));
+  }
   void _pxor(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pxor::create(Func, Dest, Src0));
   }
@@ -324,6 +339,9 @@
   void _shrd(Variable *Dest, Variable *Src0, Variable *Src1) {
     Context.insert(InstX8632Shrd::create(Func, Dest, Src0, Src1));
   }
+  void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) {
+    Context.insert(InstX8632Shufps::create(Func, Dest, Src0, Src1));
+  }
   void _sqrtss(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Sqrtss::create(Func, Dest, Src0));
   }
commit	7fa22d8a73def01899c5f30f20b914c65d5850d5	[log] [tgz]
author	Matt Wala <wala@chromium.org>	Thu Jul 17 12:41:31 2014 -0700
committer	Matt Wala <wala@chromium.org>	Thu Jul 17 12:41:31 2014 -0700
tree	30a91333e9a55c63b3aea9e8c19a8293cc024c96
parent	83b8036b4e0fb45bfc0bb7e237279dce57bea42e [diff]