Lower insertelement and extractelement. Use instructions that do the operations in registers and that are available in SSE2. Spill to memory to perform the operation in the absence of any other reasonable options (v16i8 and v16i1). Unfortunately there is no natural class of SSE2 instructions that insertelement / extractelement can get lowered to for all vector types (though pinsr[bwd] and pextr[bwd] are available in SSE4.1). There are in some cases a large number of choices available for lowering and I have not looked into which choices are the best yet, besides using LLVM output as a guide. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/401523003

commit: 49889239d4c7ab296c7430722d36032d905110b6 [log] [tgz]
author: Matt Wala <wala@chromium.org> Fri Jul 18 12:45:09 2014 -0700
committer: Matt Wala <wala@chromium.org> Fri Jul 18 12:45:09 2014 -0700
tree: 1417eefdd0b05053a5c34913bac94d74d5bb0b42
parent: 7fa22d8a73def01899c5f30f20b914c65d5850d5 [diff]
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index 9ba1fea..9f4c1d8 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp

@@ -337,6 +337,10 @@
       return convertArithInstruction(Inst, Ice::InstArithmetic::Or);
     case Instruction::Xor:
       return convertArithInstruction(Inst, Ice::InstArithmetic::Xor);
+    case Instruction::ExtractElement:
+      return convertExtractElementInstruction(cast<ExtractElementInst>(Inst));
+    case Instruction::InsertElement:
+      return convertInsertElementInstruction(cast<InsertElementInst>(Inst));
     case Instruction::Call:
       return convertCallInstruction(cast<CallInst>(Inst));
     case Instruction::Alloca:
@@ -534,6 +538,22 @@
     return Ice::InstFcmp::create(Func, Cond, Dest, Src0, Src1);
   }
 
+  Ice::Inst *convertExtractElementInstruction(const ExtractElementInst *Inst) {
+    Ice::Variable *Dest = mapValueToIceVar(Inst);
+    Ice::Operand *Source1 = convertValue(Inst->getOperand(0));
+    Ice::Operand *Source2 = convertValue(Inst->getOperand(1));
+    return Ice::InstExtractElement::create(Func, Dest, Source1, Source2);
+  }
+
+  Ice::Inst *convertInsertElementInstruction(const InsertElementInst *Inst) {
+    Ice::Variable *Dest = mapValueToIceVar(Inst);
+    Ice::Operand *Source1 = convertValue(Inst->getOperand(0));
+    Ice::Operand *Source2 = convertValue(Inst->getOperand(1));
+    Ice::Operand *Source3 = convertValue(Inst->getOperand(2));
+    return Ice::InstInsertElement::create(Func, Dest, Source1, Source2,
+                                          Source3);
+  }
+
   Ice::Inst *convertSelectInstruction(const SelectInst *Inst) {
     Ice::Variable *Dest = mapValueToIceVar(Inst);
     Ice::Operand *Cond = convertValue(Inst->getCondition());

diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 12ca16c..004b555 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp

@@ -267,6 +267,13 @@
   addSource(Source);
 }
 
+InstExtractElement::InstExtractElement(Cfg *Func, Variable *Dest,
+                                       Operand *Source1, Operand *Source2)
+    : Inst(Func, Inst::ExtractElement, 2, Dest) {
+  addSource(Source1);
+  addSource(Source2);
+}
+
 InstFcmp::InstFcmp(Cfg *Func, FCond Condition, Variable *Dest, Operand *Source1,
                    Operand *Source2)
     : Inst(Func, Inst::Fcmp, 2, Dest), Condition(Condition) {
@@ -281,6 +288,15 @@
   addSource(Source2);
 }
 
+InstInsertElement::InstInsertElement(Cfg *Func, Variable *Dest,
+                                     Operand *Source1, Operand *Source2,
+                                     Operand *Source3)
+    : Inst(Func, Inst::InsertElement, 3, Dest) {
+  addSource(Source1);
+  addSource(Source2);
+  addSource(Source3);
+}
+
 InstLoad::InstLoad(Cfg *Func, Variable *Dest, Operand *SourceAddr)
     : Inst(Func, Inst::Load, 1, Dest) {
   addSource(SourceAddr);
@@ -586,6 +602,31 @@
   dumpSources(Func);
 }
 
+void InstExtractElement::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = extractelement ";
+  Str << getSrc(0)->getType() << " ";
+  getSrc(0)->dump(Func);
+  Str << ", ";
+  Str << getSrc(1)->getType() << " ";
+  getSrc(1)->dump(Func);
+};
+
+void InstInsertElement::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  dumpDest(Func);
+  Str << " = insertelement ";
+  Str << getSrc(0)->getType() << " ";
+  getSrc(0)->dump(Func);
+  Str << ", ";
+  Str << getSrc(1)->getType() << " ";
+  getSrc(1)->dump(Func);
+  Str << ", ";
+  Str << getSrc(2)->getType() << " ";
+  getSrc(2)->dump(Func);
+};
+
 void InstFcmp::dump(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrDump();
   dumpDest(Func);

diff --git a/src/IceInst.h b/src/IceInst.h
index 0397e02..0a6c61d 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h

@@ -41,9 +41,11 @@
     Br,
     Call,
     Cast,
+    ExtractElement,
     Fcmp,
     Icmp,
     IntrinsicCall,
+    InsertElement,
     Load,
     Phi,
     Ret,
@@ -344,6 +346,29 @@
   const OpKind CastKind;
 };
 
+// ExtractElement instruction.
+class InstExtractElement : public Inst {
+public:
+  static InstExtractElement *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                    Operand *Source2) {
+    return new (Func->allocateInst<InstExtractElement>())
+        InstExtractElement(Func, Dest, Source1, Source2);
+  }
+
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) {
+    return Inst->getKind() == ExtractElement;
+  }
+
+private:
+  InstExtractElement(Cfg *Func, Variable *Dest, Operand *Source1,
+                     Operand *Source2);
+  InstExtractElement(const InstExtractElement &) LLVM_DELETED_FUNCTION;
+  InstExtractElement &
+  operator=(const InstExtractElement &) LLVM_DELETED_FUNCTION;
+  virtual ~InstExtractElement() {}
+};
+
 // Floating-point comparison instruction.  The source operands are
 // captured in getSrc(0) and getSrc(1).
 class InstFcmp : public Inst {
@@ -402,6 +427,28 @@
   const ICond Condition;
 };
 
+// InsertElement instruction.
+class InstInsertElement : public Inst {
+public:
+  static InstInsertElement *create(Cfg *Func, Variable *Dest, Operand *Source1,
+                                   Operand *Source2, Operand *Source3) {
+    return new (Func->allocateInst<InstInsertElement>())
+        InstInsertElement(Func, Dest, Source1, Source2, Source3);
+  }
+
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) {
+    return Inst->getKind() == InsertElement;
+  }
+
+private:
+  InstInsertElement(Cfg *Func, Variable *Dest, Operand *Source1,
+                    Operand *Source2, Operand *Source3);
+  InstInsertElement(const InstInsertElement &) LLVM_DELETED_FUNCTION;
+  InstInsertElement &operator=(const InstInsertElement &) LLVM_DELETED_FUNCTION;
+  virtual ~InstInsertElement() {}
+};
+
 // Call to an intrinsic function.  The call target is captured as getSrc(0),
 // and arg I is captured as getSrc(I+1).
 class InstIntrinsicCall : public InstCall {

diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index bb99440..baa145f 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp

@@ -42,7 +42,7 @@
   const char *PackString;  // b, w, d, or <blank>
   const char *WidthString; // {byte,word,dword,qword} ptr
 } TypeX8632Attributes[] = {
-#define X(tag, cvt, sdss, pack, width)                                         \
+#define X(tag, elementty, cvt, sdss, pack, width)                              \
   { cvt, "" sdss, pack, width }                                                \
   ,
     ICETYPEX8632_TABLE
@@ -312,21 +312,6 @@
   return false;
 }
 
-InstX8632Pshufd::InstX8632Pshufd(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2)
-    : InstX8632(Func, InstX8632::Pshufd, 2, Dest) {
-  addSource(Source1);
-  addSource(Source2);
-}
-
-InstX8632Shufps::InstX8632Shufps(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2)
-    : InstX8632(Func, InstX8632::Shufps, 3, Dest) {
-  addSource(Dest);
-  addSource(Source1);
-  addSource(Source2);
-}
-
 InstX8632Ret::InstX8632Ret(Cfg *Func, Variable *Source)
     : InstX8632(Func, InstX8632::Ret, Source ? 1 : 0, NULL) {
   if (Source)
@@ -454,9 +439,15 @@
   Str << "\n";
 }
 
+
+// Unary ops
 template <> const char *InstX8632Bsf::Opcode = "bsf";
 template <> const char *InstX8632Bsr::Opcode = "bsr";
+template <> const char *InstX8632Lea::Opcode = "lea";
+template <> const char *InstX8632Movd::Opcode = "movd";
+template <> const char *InstX8632Movss::Opcode = "movss";
 template <> const char *InstX8632Sqrtss::Opcode = "sqrtss";
+// Binary ops
 template <> const char *InstX8632Add::Opcode = "add";
 template <> const char *InstX8632Addps::Opcode = "addps";
 template <> const char *InstX8632Adc::Opcode = "adc";
@@ -489,6 +480,12 @@
 template <> const char *InstX8632Psra::Opcode = "psra";
 template <> const char *InstX8632Pcmpeq::Opcode = "pcmpeq";
 template <> const char *InstX8632Pcmpgt::Opcode = "pcmpgt";
+// Ternary ops
+template <> const char *InstX8632Shufps::Opcode = "shufps";
+template <> const char *InstX8632Pinsrw::Opcode = "pinsrw";
+// Three address ops
+template <> const char *InstX8632Pextrw::Opcode = "pextrw";
+template <> const char *InstX8632Pshufd::Opcode = "pshufd";
 
 template <> void InstX8632Sqrtss::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
@@ -556,6 +553,22 @@
   emitTwoAddress(buf, this, Func);
 }
 
+template <> void InstX8632Div::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 3);
+  Str << "\t" << Opcode << "\t";
+  getSrc(1)->emit(Func);
+  Str << "\n";
+}
+
+template <> void InstX8632Idiv::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 3);
+  Str << "\t" << Opcode << "\t";
+  getSrc(1)->emit(Func);
+  Str << "\n";
+}
+
 template <> void InstX8632Imul::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 2);
@@ -868,6 +881,25 @@
   getSrc(0)->dump(Func);
 }
 
+template <> void InstX8632Lea::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 1);
+  assert(getDest()->hasReg());
+  Str << "\tlea\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  Operand *Src0 = getSrc(0);
+  if (Variable *VSrc0 = llvm::dyn_cast<Variable>(Src0)) {
+    Type Ty = VSrc0->getType();
+    // lea on x86-32 doesn't accept mem128 operands, so cast VSrc0 to an
+    // acceptable type.
+    VSrc0->asType(isVectorType(Ty) ? IceType_i32 : Ty).emit(Func);
+  } else {
+    Src0->emit(Func);
+  }
+  Str << "\n";
+}
+
 void InstX8632Mov::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
@@ -893,6 +925,9 @@
   // safe, we instead widen the dest to match src.  This works even
   // for stack-allocated dest variables because typeWidthOnStack()
   // pads to a 4-byte boundary even if only a lower portion is used.
+  // TODO: This assert disallows usages such as copying a floating point
+  // value between a vector and a scalar (which movss is used for).
+  // Clean this up.
   assert(Func->getTarget()->typeWidthInBytesOnStack(getDest()->getType()) ==
          Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
   getDest()->asType(Src->getType()).emit(Func);
@@ -1066,6 +1101,39 @@
   emitTwoAddress(buf, this, Func);
 }
 
+template <> void InstX8632Pextrw::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  Str << "\t" << Opcode << "\t";
+  Variable *Dest = getDest();
+  assert(Dest->hasReg() && Dest->getType() == IceType_i16);
+  // pextrw takes r32 dest.
+  Dest->asType(IceType_i32).emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << ", ";
+  getSrc(1)->emit(Func);
+  Str << "\n";
+}
+
+template <> void InstX8632Pinsrw::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 3);
+  Str << "\t" << Opcode << "\t";
+  getDest()->emit(Func);
+  Str << ", ";
+  Operand *Src1 = getSrc(1);
+  if (Variable *VSrc1 = llvm::dyn_cast<Variable>(Src1)) {
+    // If src1 is a register, it should be r32.
+    VSrc1->asType(VSrc1->hasReg() ? IceType_i32 : IceType_i16).emit(Func);
+  } else {
+    Src1->emit(Func);
+  }
+  Str << ", ";
+  getSrc(2)->emit(Func);
+  Str << "\n";
+}
+
 void InstX8632Pop::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 0);
@@ -1138,25 +1206,6 @@
   emitTwoAddress(buf, this, Func);
 }
 
-void InstX8632Pshufd::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 2);
-  Str << "\tpshufd\t";
-  getDest()->emit(Func);
-  Str << ", ";
-  getSrc(0)->emit(Func);
-  Str << ", ";
-  getSrc(1)->emit(Func);
-  Str << "\n";
-}
-
-void InstX8632Pshufd::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = pshufd." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
 void InstX8632Ret::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   Str << "\tret\n";
@@ -1169,25 +1218,6 @@
   dumpSources(Func);
 }
 
-void InstX8632Shufps::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 3);
-  Str << "\tshufps\t";
-  getDest()->emit(Func);
-  Str << ", ";
-  getSrc(1)->emit(Func);
-  Str << ", ";
-  getSrc(2)->emit(Func);
-  Str << "\n";
-}
-
-void InstX8632Shufps::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = shufps." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
 void InstX8632Xadd::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   if (Locked) {

diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index 8930c29..be7aeb5 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def

@@ -66,23 +66,23 @@
   X(Br_p,        "p",  "jp")   \
 //#define X(tag, dump, emit)
 
-#define ICETYPEX8632_TABLE                          \
-  /* tag,          cvt, sdss,  pack, width */       \
-  X(IceType_void,  "?",  ""  , "" ,  "???")         \
-  X(IceType_i1,    "si", ""  , "" ,  "byte ptr")    \
-  X(IceType_i8,    "si", ""  , "" ,  "byte ptr")    \
-  X(IceType_i16,   "si", ""  , "" ,  "word ptr")    \
-  X(IceType_i32,   "si", ""  , "" ,  "dword ptr")   \
-  X(IceType_i64,   "si", ""  , "" ,  "qword ptr")   \
-  X(IceType_f32,   "ss", "ss", "" ,  "dword ptr")   \
-  X(IceType_f64,   "sd", "sd", "" ,  "qword ptr")   \
-  X(IceType_v4i1,  "?",  ""  , "" ,  "xmmword ptr") \
-  X(IceType_v8i1,  "?",  ""  , "" ,  "xmmword ptr") \
-  X(IceType_v16i1, "?",  ""  , "" ,  "xmmword ptr") \
-  X(IceType_v16i8, "?",  ""  , "b",  "xmmword ptr") \
-  X(IceType_v8i16, "?",  ""  , "w",  "xmmword ptr") \
-  X(IceType_v4i32, "dq", ""  , "d",  "xmmword ptr") \
-  X(IceType_v4f32, "ps", ""  , "" ,  "xmmword ptr") \
-//#define X(tag, cvt, sdss, width)
+#define ICETYPEX8632_TABLE                                        \
+  /* tag,          element type, cvt, sdss,  pack, width */       \
+  X(IceType_void,  IceType_void, "?" , ""  , "" ,  "???")         \
+  X(IceType_i1,    IceType_void, "si", ""  , "" ,  "byte ptr")    \
+  X(IceType_i8,    IceType_void, "si", ""  , "" ,  "byte ptr")    \
+  X(IceType_i16,   IceType_void, "si", ""  , "" ,  "word ptr")    \
+  X(IceType_i32,   IceType_void, "si", ""  , "" ,  "dword ptr")   \
+  X(IceType_i64,   IceType_void, "si", ""  , "" ,  "qword ptr")   \
+  X(IceType_f32,   IceType_void, "ss", "ss", "" ,  "dword ptr")   \
+  X(IceType_f64,   IceType_void, "sd", "sd", "" ,  "qword ptr")   \
+  X(IceType_v4i1,  IceType_i32 , "?" , ""  , "" ,  "xmmword ptr") \
+  X(IceType_v8i1,  IceType_i16 , "?" , ""  , "" ,  "xmmword ptr") \
+  X(IceType_v16i1, IceType_i8  , "?" , ""  , "" ,  "xmmword ptr") \
+  X(IceType_v16i8, IceType_i8  , "?" , ""  , "b",  "xmmword ptr") \
+  X(IceType_v8i16, IceType_i16 , "?" , ""  , "w",  "xmmword ptr") \
+  X(IceType_v4i32, IceType_i32 , "dq", ""  , "d",  "xmmword ptr") \
+  X(IceType_v4f32, IceType_f32 , "ps", ""  , "" ,  "xmmword ptr") \
+//#define X(tag, elementty, cvt, sdss, width)
 
 #endif // SUBZERO_SRC_ICEINSTX8632_DEF

diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 03605ca..db60d68 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h

@@ -156,11 +156,14 @@
     Idiv,
     Imul,
     Label,
+    Lea,
     Load,
     Mfence,
     Mov,
+    Movd,
     Movp,
     Movq,
+    Movss,
     Movsx,
     Movzx,
     Mul,
@@ -172,6 +175,8 @@
     Pand,
     Pcmpeq,
     Pcmpgt,
+    Pextrw,
+    Pinsrw,
     Pmullw,
     Pmuludq,
     Pop,
@@ -430,7 +435,11 @@
     Ostream &Str = Func->getContext()->getStrEmit();
     assert(getSrcSize() == 3);
     Str << "\t" << Opcode << "\t";
+    getDest()->emit(Func);
+    Str << ", ";
     getSrc(1)->emit(Func);
+    Str << ", ";
+    getSrc(2)->emit(Func);
     Str << "\n";
   }
   virtual void dump(const Cfg *Func) const {
@@ -454,8 +463,54 @@
   static const char *Opcode;
 };
 
+// Instructions of the form x := y op z
+template <InstX8632::InstKindX8632 K>
+class InstX8632ThreeAddressop : public InstX8632 {
+public:
+  static InstX8632ThreeAddressop *create(Cfg *Func, Variable *Dest,
+                                         Operand *Source0, Operand *Source1) {
+    return new (Func->allocate<InstX8632ThreeAddressop>())
+        InstX8632ThreeAddressop(Func, Dest, Source0, Source1);
+  }
+  virtual void emit(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(getSrcSize() == 2);
+    Str << "\t" << Opcode << "\t";
+    getDest()->emit(Func);
+    Str << ", ";
+    getSrc(0)->emit(Func);
+    Str << ", ";
+    getSrc(1)->emit(Func);
+    Str << "\n";
+  }
+  virtual void dump(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrDump();
+    dumpDest(Func);
+    Str << " = " << Opcode << "." << getDest()->getType() << " ";
+    dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+  InstX8632ThreeAddressop(Cfg *Func, Variable *Dest, Operand *Source0,
+                          Operand *Source1)
+      : InstX8632(Func, K, 2, Dest) {
+    addSource(Source0);
+    addSource(Source1);
+  }
+  InstX8632ThreeAddressop(const InstX8632ThreeAddressop &)
+      LLVM_DELETED_FUNCTION;
+  InstX8632ThreeAddressop &
+  operator=(const InstX8632ThreeAddressop &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632ThreeAddressop() {}
+  static const char *Opcode;
+};
+
 typedef InstX8632Unaryop<InstX8632::Bsf> InstX8632Bsf;
 typedef InstX8632Unaryop<InstX8632::Bsr> InstX8632Bsr;
+typedef InstX8632Unaryop<InstX8632::Lea> InstX8632Lea;
+typedef InstX8632Unaryop<InstX8632::Movd> InstX8632Movd;
+typedef InstX8632Unaryop<InstX8632::Movss> InstX8632Movss;
 typedef InstX8632Unaryop<InstX8632::Sqrtss> InstX8632Sqrtss;
 typedef InstX8632Binop<InstX8632::Add> InstX8632Add;
 typedef InstX8632Binop<InstX8632::Addps> InstX8632Addps;
@@ -489,6 +544,10 @@
 typedef InstX8632Binop<InstX8632::Pcmpgt> InstX8632Pcmpgt;
 typedef InstX8632Ternop<InstX8632::Idiv> InstX8632Idiv;
 typedef InstX8632Ternop<InstX8632::Div> InstX8632Div;
+typedef InstX8632Ternop<InstX8632::Pinsrw> InstX8632Pinsrw;
+typedef InstX8632Ternop<InstX8632::Shufps> InstX8632Shufps;
+typedef InstX8632ThreeAddressop<InstX8632::Pextrw> InstX8632Pextrw;
+typedef InstX8632ThreeAddressop<InstX8632::Pshufd> InstX8632Pshufd;
 
 // Base class for a lockable x86-32 instruction (emits a locked prefix).
 class InstX8632Lockable : public InstX8632 {
@@ -994,27 +1053,6 @@
   virtual ~InstX8632Push() {}
 };
 
-// Pshufd - shuffle a vector of doublewords 
-class InstX8632Pshufd : public InstX8632 {
-public:
-  static InstX8632Pshufd *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2) {
-    return new (Func->allocate<InstX8632Pshufd>())
-        InstX8632Pshufd(Func, Dest, Source1, Source2);
-  }
-  virtual void emit(const Cfg *Func) const;
-  virtual void dump(const Cfg *Func) const;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Pshufd); }
-
-private:
-  InstX8632Pshufd(Cfg *Func, Variable *Dest, Operand *Source1,
-                  Operand *Source2);
-  InstX8632Pshufd(const InstX8632Pshufd &) LLVM_DELETED_FUNCTION;
-  InstX8632Pshufd &operator=(const InstX8632Pshufd &) LLVM_DELETED_FUNCTION;
-  virtual ~InstX8632Pshufd() {}
-  static const char *Opcode;
-};
-
 // Ret instruction.  Currently only supports the "ret" version that
 // does not pop arguments.  This instruction takes a Source operand
 // (for non-void returning functions) for liveness analysis, though
@@ -1035,27 +1073,6 @@
   virtual ~InstX8632Ret() {}
 };
 
-// Shufps - select from two vectors of floating point values
-class InstX8632Shufps : public InstX8632 {
-public:
-  static InstX8632Shufps *create(Cfg *Func, Variable *Dest, Operand *Source1,
-                                 Operand *Source2) {
-    return new (Func->allocate<InstX8632Shufps>())
-        InstX8632Shufps(Func, Dest, Source1, Source2);
-  }
-  virtual void emit(const Cfg *Func) const;
-  virtual void dump(const Cfg *Func) const;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Shufps); }
-
-private:
-  InstX8632Shufps(Cfg *Func, Variable *Dest, Operand *Source1,
-                  Operand *Source2);
-  InstX8632Shufps(const InstX8632Shufps &) LLVM_DELETED_FUNCTION;
-  InstX8632Shufps &operator=(const InstX8632Shufps &) LLVM_DELETED_FUNCTION;
-  virtual ~InstX8632Shufps() {}
-  static const char *Opcode;
-};
-
 // Exchanging Add instruction.  Exchanges the first operand (destination
 // operand) with the second operand (source operand), then loads the sum
 // of the two values into the destination operand. The destination may be

diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index a5dd39a..3f6098c 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp

@@ -110,12 +110,18 @@
   case Inst::Cast:
     lowerCast(llvm::dyn_cast<InstCast>(Inst));
     break;
+  case Inst::ExtractElement:
+    lowerExtractElement(llvm::dyn_cast<InstExtractElement>(Inst));
+    break;
   case Inst::Fcmp:
     lowerFcmp(llvm::dyn_cast<InstFcmp>(Inst));
     break;
   case Inst::Icmp:
     lowerIcmp(llvm::dyn_cast<InstIcmp>(Inst));
     break;
+  case Inst::InsertElement:
+    lowerInsertElement(llvm::dyn_cast<InstInsertElement>(Inst));
+    break;
   case Inst::IntrinsicCall:
     lowerIntrinsicCall(llvm::dyn_cast<InstIntrinsicCall>(Inst));
     break;

diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index ed5389c..c798943 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h

@@ -169,7 +169,9 @@
   virtual void lowerCall(const InstCall *Inst) = 0;
   virtual void lowerCast(const InstCast *Inst) = 0;
   virtual void lowerFcmp(const InstFcmp *Inst) = 0;
+  virtual void lowerExtractElement(const InstExtractElement *Inst) = 0;
   virtual void lowerIcmp(const InstIcmp *Inst) = 0;
+  virtual void lowerInsertElement(const InstInsertElement *Inst) = 0;
   virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst) = 0;
   virtual void lowerLoad(const InstLoad *Inst) = 0;
   virtual void lowerPhi(const InstPhi *Inst) = 0;

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 2b14a65..af9ebc5 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -85,6 +85,27 @@
   return TableIcmp32[Index].Mapping;
 }
 
+const struct TableTypeX8632Attributes_ {
+  Type InVectorElementType;
+} TableTypeX8632Attributes[] = {
+#define X(tag, elementty, cvt, sdss, pack, width)                              \
+  { elementty }                                                                \
+  ,
+    ICETYPEX8632_TABLE
+#undef X
+  };
+const size_t TableTypeX8632AttributesSize =
+    llvm::array_lengthof(TableTypeX8632Attributes);
+
+// Return the type which the elements of the vector have in the X86
+// representation of the vector.
+Type getInVectorElementType(Type Ty) {
+  assert(isVectorType(Ty));
+  size_t Index = static_cast<size_t>(Ty);
+  assert(Index < TableTypeX8632AttributesSize);
+  return TableTypeX8632Attributes[Ty].InVectorElementType;
+}
+
 // The maximum number of arguments to pass in XMM registers
 const unsigned X86_MAX_XMM_ARGS = 4;
 // The number of bits in a byte
@@ -173,7 +194,7 @@
     // Define a temporary set of enum values based on low-level
     // table entries.
     enum _tmp_enum {
-#define X(tag, cvt, sdss, pack, width) _tmp_##tag,
+#define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
       ICETYPEX8632_TABLE
 #undef X
           _num
@@ -185,7 +206,7 @@
 #undef X
 // Define a set of constants based on low-level table entries,
 // and ensure the table entry keys are consistent.
-#define X(tag, cvt, sdss, pack, width)                                         \
+#define X(tag, elementty, cvt, sdss, pack, width)                              \
   static const int _table2_##tag = _tmp_##tag;                                 \
   STATIC_ASSERT(_table1_##tag == _table2_##tag);
     ICETYPEX8632_TABLE;
@@ -2107,6 +2128,85 @@
   }
 }
 
+void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
+  Operand *SourceVectOperand = Inst->getSrc(0);
+  ConstantInteger *ElementIndex =
+      llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
+  // Only constant indices are allowed in PNaCl IR.
+  assert(ElementIndex);
+
+  unsigned Index = ElementIndex->getValue();
+  Type Ty = SourceVectOperand->getType();
+  Type ElementTy = typeElementType(Ty);
+  Type InVectorElementTy = getInVectorElementType(Ty);
+  Variable *ExtractedElement = makeReg(InVectorElementTy);
+
+  // TODO(wala): Determine the best lowering sequences for each type.
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
+    // Lower extractelement operations where the element is 32 bits
+    // wide with pshufd.
+    // TODO(wala): SSE4.1 has extractps and pextrd
+    //
+    // ALIGNHACK: Force vector operands to registers in instructions that
+    // require aligned memory operands until support for stack alignment
+    // is implemented.
+#define ALIGN_HACK(Vect) legalizeToVar((Vect))
+    Operand *T = NULL;
+    if (Index) {
+      // The shuffle only needs to occur if the element to be extracted
+      // is not at the lowest index.
+      Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
+      T = makeReg(Ty);
+      _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
+    } else {
+      // TODO(wala): If SourceVectOperand is in memory, express it as
+      // mem32 so that the call to legalizeToVar() is made unnecessary.
+      // _movd and _movss only take mem32 memory operands.
+      T = legalizeToVar(SourceVectOperand);
+    }
+
+    if (InVectorElementTy == IceType_i32) {
+      _movd(ExtractedElement, T);
+    } else { // InVectorElementTy == IceType_f32
+      // TODO: _mov should be able to be used here.
+      _movss(ExtractedElement, T);
+    }
+#undef ALIGN_HACK
+  } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
+    Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
+    _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
+  } else {
+    assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
+    // Spill the value to a stack slot and do the extraction in memory.
+    // TODO(wala): SSE4.1 has pextrb.
+    //
+    // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
+    // support for legalizing to mem is implemented.
+    Variable *Slot = Func->makeVariable(Ty, Context.getNode());
+    Slot->setWeight(RegWeight::Zero);
+    _movp(Slot, legalizeToVar(SourceVectOperand));
+
+    // Compute the location of the element in memory.
+    unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
+    OperandX8632Mem *Loc =
+        getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
+    _mov(ExtractedElement, Loc);
+  }
+
+  if (ElementTy == IceType_i1) {
+    // Truncate extracted integers to i1s if necessary.
+    Variable *T = makeReg(IceType_i1);
+    InstCast *Cast =
+        InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
+    lowerCast(Cast);
+    ExtractedElement = T;
+  }
+
+  // Copy the element to the destination.
+  Variable *Dest = Inst->getDest();
+  _mov(Dest, ExtractedElement);
+}
+
 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
   Operand *Src0 = Inst->getSrc(0);
   Operand *Src1 = Inst->getSrc(1);
@@ -2238,6 +2338,123 @@
   Context.insert(Label);
 }
 
+void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
+  Operand *SourceVectOperand = Inst->getSrc(0);
+  Operand *ElementToInsert = Inst->getSrc(1);
+  ConstantInteger *ElementIndex =
+      llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
+  // Only constant indices are allowed in PNaCl IR.
+  assert(ElementIndex);
+  unsigned Index = ElementIndex->getValue();
+
+  Type Ty = SourceVectOperand->getType();
+  Type ElementTy = typeElementType(Ty);
+  Type InVectorElementTy = getInVectorElementType(Ty);
+
+  if (ElementTy == IceType_i1) {
+    // Expand the element to the appropriate size for it to be inserted
+    // in the vector.
+    Variable *Expanded =
+        Func->makeVariable(InVectorElementTy, Context.getNode());
+    InstCast *Cast =
+        InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
+    lowerCast(Cast);
+    ElementToInsert = Expanded;
+  }
+
+  if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
+    // Lower insertelement with 32-bit wide elements using shufps.
+    // TODO(wala): SSE4.1 has pinsrd and insertps.
+    Variable *Element = NULL;
+    if (InVectorElementTy == IceType_f32) {
+      // Element will be in an XMM register since it is floating point.
+      Element = legalizeToVar(ElementToInsert);
+    } else {
+      // Copy an integer to an XMM register.
+      Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
+      Element = makeReg(Ty);
+      _movd(Element, T);
+    }
+
+    // shufps treats the source and desination operands as vectors of
+    // four doublewords.  The destination's two high doublewords are
+    // selected from the source operand and the two low doublewords are
+    // selected from the (original value of) the destination operand.
+    // An insertelement operation can be effected with a sequence of two
+    // shufps operations with appropriate masks.  In all cases below,
+    // Element[0] is being inserted into SourceVectOperand.  Indices are
+    // ordered from left to right.
+    //
+    // insertelement into index 0 (result is stored in Element):
+    //   Element := Element[0, 0] SourceVectOperand[0, 1]
+    //   Element := Element[0, 3] SourceVectOperand[2, 3]
+    //
+    // insertelement into index 1 (result is stored in Element):
+    //   Element := Element[0, 0] SourceVectOperand[0, 0]
+    //   Element := Element[3, 0] SourceVectOperand[2, 3]
+    //
+    // insertelement into index 2 (result is stored in T):
+    //   T := SourceVectOperand
+    //   Element := Element[0, 0] T[0, 3]
+    //   T := T[0, 1] Element[0, 3]
+    //
+    // insertelement into index 3 (result is stored in T):
+    //   T := SourceVectOperand
+    //   Element := Element[0, 0] T[0, 2]
+    //   T := T[0, 1] Element[3, 0]
+    const unsigned char Mask1[4] = {64, 0, 192, 128};
+    const unsigned char Mask2[4] = {236, 227, 196, 52};
+
+    Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
+    Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
+
+    // ALIGNHACK: Force vector operands to registers in instructions that
+    // require aligned memory operands until support for stack alignment
+    // is implemented.
+#define ALIGN_HACK(Vect) legalizeToVar((Vect))
+    if (Index < 2) {
+      SourceVectOperand = ALIGN_HACK(SourceVectOperand);
+      _shufps(Element, SourceVectOperand, Mask1Constant);
+      _shufps(Element, SourceVectOperand, Mask2Constant);
+      _movp(Inst->getDest(), Element);
+    } else {
+      Variable *T = makeReg(Ty);
+      _movp(T, SourceVectOperand);
+      _shufps(Element, T, Mask1Constant);
+      _shufps(T, Element, Mask2Constant);
+      _movp(Inst->getDest(), T);
+    }
+#undef ALIGN_HACK
+  } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
+    Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
+    Variable *T = makeReg(Ty);
+    _movp(T, SourceVectOperand);
+    _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
+    _movp(Inst->getDest(), T);
+  } else {
+    assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
+    // Spill the value to a stack slot and perform the insertion in
+    // memory.
+    // TODO(wala): SSE4.1 has pinsrb.
+    //
+    // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
+    // support for legalizing to mem is implemented.
+    Variable *Slot = Func->makeVariable(Ty, Context.getNode());
+    Slot->setWeight(RegWeight::Zero);
+    _movp(Slot, legalizeToVar(SourceVectOperand));
+
+    // Compute the location of the position to insert in memory.
+    unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
+    OperandX8632Mem *Loc =
+        getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
+    _store(legalizeToVar(ElementToInsert), Loc);
+
+    Variable *T = makeReg(Ty);
+    _movp(T, Slot);
+    _movp(Inst->getDest(), T);
+  }
+}
+
 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
   switch (Instr->getIntrinsicInfo().ID) {
   case Intrinsics::AtomicCmpxchg: {
@@ -3169,6 +3386,23 @@
   return Dest;
 }
 
+OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
+                                                           Variable *Slot,
+                                                           uint32_t Offset) {
+  // Ensure that Loc is a stack slot.
+  assert(Slot->getWeight() == RegWeight::Zero);
+  assert(Slot->getRegNum() == Variable::NoRegister);
+  // Compute the location of Loc in memory.
+  // TODO(wala,stichnot): lea should not be required.  The address of
+  // the stack slot is known at compile time (although not until after
+  // addProlog()).
+  const Type PointerType = IceType_i32;
+  Variable *Loc = makeReg(PointerType);
+  _lea(Loc, Slot);
+  Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
+  return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
+}
+
 // Helper for legalize() to emit the right code to lower an operand to a
 // register of the appropriate type.
 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {

diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 58d8781..fefc7fd 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h

@@ -82,9 +82,11 @@
   virtual void lowerBr(const InstBr *Inst);
   virtual void lowerCall(const InstCall *Inst);
   virtual void lowerCast(const InstCast *Inst);
+  virtual void lowerExtractElement(const InstExtractElement *Inst);
   virtual void lowerFcmp(const InstFcmp *Inst);
   virtual void lowerIcmp(const InstIcmp *Inst);
   virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst);
+  virtual void lowerInsertElement(const InstInsertElement *Inst);
   virtual void lowerLoad(const InstLoad *Inst);
   virtual void lowerPhi(const InstPhi *Inst);
   virtual void lowerRet(const InstRet *Inst);
@@ -152,6 +154,10 @@
   Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
   Variable *makeVectorOfOnes(Type Ty, int32_t RegNum = Variable::NoRegister);
 
+  // Return a memory operand corresponding to a stack allocated Variable.
+  OperandX8632Mem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
+                                                uint32_t Offset = 0);
+
   // The following are helpers that insert lowered x86 instructions
   // with minimal syntactic overhead, so that the lowering code can
   // look as close to assembly as practical.
@@ -237,6 +243,9 @@
   void _imul(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Imul::create(Func, Dest, Src0));
   }
+  void _lea(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Lea::create(Func, Dest, Src0));
+  }
   void _mfence() { Context.insert(InstX8632Mfence::create(Func)); }
   // If Dest=NULL is passed in, then a new variable is created, marked
   // as infinite register allocation weight, and returned through the
@@ -249,12 +258,18 @@
       Context.insert(InstX8632Mov::create(Func, Dest, Src0));
     }
   }
+  void _movd(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Movd::create(Func, Dest, Src0));
+  }
   void _movp(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movp::create(Func, Dest, Src0));
   }
   void _movq(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movq::create(Func, Dest, Src0));
   }
+  void _movss(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Movss::create(Func, Dest, Src0));
+  }
   void _movsx(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Movsx::create(Func, Dest, Src0));
   }
@@ -288,6 +303,12 @@
   void _pcmpgt(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pcmpgt::create(Func, Dest, Src0));
   }
+  void _pextrw(Variable *Dest, Operand *Src0, Operand *Src1) {
+    Context.insert(InstX8632Pextrw::create(Func, Dest, Src0, Src1));
+  }
+  void _pinsrw(Variable *Dest, Operand *Src0, Operand *Src1) {
+    Context.insert(InstX8632Pinsrw::create(Func, Dest, Src0, Src1));
+  }
   void _pmullw(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Pmullw::create(Func, Dest, Src0));
   }
commit	49889239d4c7ab296c7430722d36032d905110b6	[log] [tgz]
author	Matt Wala <wala@chromium.org>	Fri Jul 18 12:45:09 2014 -0700
committer	Matt Wala <wala@chromium.org>	Fri Jul 18 12:45:09 2014 -0700
tree	1417eefdd0b05053a5c34913bac94d74d5bb0b42
parent	7fa22d8a73def01899c5f30f20b914c65d5850d5 [diff]