Implement intrinsics for loading/storing subvectors.

This enables emulating 64-bit and 32-bit vectors using 128-bit
vectors internally (x86 only for now). Note that these Intrinsics
are not part of the PNaCL specification.

BUG=swiftshader:15

Change-Id: I61a666243832c2856e60eb477d42a72dec07d01d
Reviewed-on: https://chromium-review.googlesource.com/392246
Reviewed-by: Jim Stichnoth <stichnot@chromium.org>
Tested-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 2ed35fe..230df8f 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -166,6 +166,7 @@
       Store,
       StoreP,
       StoreQ,
+      StoreD,
       Sub,
       SubRMW,
       Subps,
@@ -2595,7 +2596,7 @@
     InstX86StoreQ &operator=(const InstX86StoreQ &) = delete;
 
   public:
-    static InstX86StoreQ *create(Cfg *Func, Variable *Value,
+    static InstX86StoreQ *create(Cfg *Func, Operand *Value,
                                  X86OperandMem *Mem) {
       return new (Func->allocate<InstX86StoreQ>())
           InstX86StoreQ(Func, Value, Mem);
@@ -2608,7 +2609,29 @@
     }
 
   private:
-    InstX86StoreQ(Cfg *Func, Variable *Value, X86OperandMem *Mem);
+    InstX86StoreQ(Cfg *Func, Operand *Value, X86OperandMem *Mem);
+  };
+
+  class InstX86StoreD final : public InstX86Base {
+    InstX86StoreD() = delete;
+    InstX86StoreD(const InstX86StoreD &) = delete;
+    InstX86StoreD &operator=(const InstX86StoreD &) = delete;
+
+  public:
+    static InstX86StoreD *create(Cfg *Func, Operand *Value,
+                                 X86OperandMem *Mem) {
+      return new (Func->allocate<InstX86StoreD>())
+          InstX86StoreD(Func, Value, Mem);
+    }
+    void emit(const Cfg *Func) const override;
+    void emitIAS(const Cfg *Func) const override;
+    void dump(const Cfg *Func) const override;
+    static bool classof(const Inst *Instr) {
+      return InstX86Base::isClassof(Instr, InstX86Base::StoreQ);
+    }
+
+  private:
+    InstX86StoreD(Cfg *Func, Operand *Value, X86OperandMem *Mem);
   };
 
   /// Nop instructions of varying length
@@ -3007,6 +3030,7 @@
   using Store = typename InstImpl<TraitsType>::InstX86Store;
   using StoreP = typename InstImpl<TraitsType>::InstX86StoreP;
   using StoreQ = typename InstImpl<TraitsType>::InstX86StoreQ;
+  using StoreD = typename InstImpl<TraitsType>::InstX86StoreD;
   using Nop = typename InstImpl<TraitsType>::InstX86Nop;
   template <typename T = typename InstImpl<TraitsType>::Traits>
   using Fld =
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 26fa2c4..1bc2a0c 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -297,7 +297,7 @@
 }
 
 template <typename TraitsType>
-InstImpl<TraitsType>::InstX86StoreQ::InstX86StoreQ(Cfg *Func, Variable *Value,
+InstImpl<TraitsType>::InstX86StoreQ::InstX86StoreQ(Cfg *Func, Operand *Value,
                                                    X86OperandMem *Mem)
     : InstX86Base(Func, InstX86Base::StoreQ, 2, nullptr) {
   this->addSource(Value);
@@ -305,6 +305,14 @@
 }
 
 template <typename TraitsType>
+InstImpl<TraitsType>::InstX86StoreD::InstX86StoreD(Cfg *Func, Operand *Value,
+                                                   X86OperandMem *Mem)
+    : InstX86Base(Func, InstX86Base::StoreD, 2, nullptr) {
+  this->addSource(Value);
+  this->addSource(Mem);
+}
+
+template <typename TraitsType>
 InstImpl<TraitsType>::InstX86Nop::InstX86Nop(Cfg *Func, NopVariant Variant)
     : InstX86Base(Func, InstX86Base::Nop, 0, nullptr), Variant(Variant) {}
 
@@ -2021,6 +2029,46 @@
 }
 
 template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreD::emit(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(this->getSrcSize() == 2);
+  assert(this->getSrc(1)->getType() == IceType_i64 ||
+         this->getSrc(1)->getType() == IceType_f64 ||
+         isVectorType(this->getSrc(1)->getType()));
+  Str << "\t"
+         "movd\t";
+  this->getSrc(0)->emit(Func);
+  Str << ", ";
+  this->getSrc(1)->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreD::emitIAS(const Cfg *Func) const {
+  Assembler *Asm = Func->getAssembler<Assembler>();
+  assert(this->getSrcSize() == 2);
+  const auto *SrcVar = llvm::cast<Variable>(this->getSrc(0));
+  const auto DestMem = llvm::cast<X86OperandMem>(this->getSrc(1));
+  assert(DestMem->getSegmentRegister() == X86OperandMem::DefaultSegment);
+  assert(SrcVar->hasReg());
+  auto *Target = InstX86Base::getTarget(Func);
+  Asm->movd(SrcVar->getType(), DestMem->toAsmAddress(Asm, Target),
+            Traits::getEncodedXmm(SrcVar->getRegNum()));
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86StoreD::dump(const Cfg *Func) const {
+  if (!BuildDefs::dump())
+    return;
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "stored." << this->getSrc(0)->getType() << " ";
+  this->getSrc(1)->dump(Func);
+  Str << ", ";
+  this->getSrc(0)->dump(Func);
+}
+
+template <typename TraitsType>
 void InstImpl<TraitsType>::InstX86Lea::emit(const Cfg *Func) const {
   if (!BuildDefs::dump())
     return;
@@ -2279,7 +2327,8 @@
 void InstImpl<TraitsType>::InstX86Movq::emitIAS(const Cfg *Func) const {
   assert(this->getSrcSize() == 1);
   assert(this->getDest()->getType() == IceType_i64 ||
-         this->getDest()->getType() == IceType_f64);
+         this->getDest()->getType() == IceType_f64 ||
+         isVectorType(this->getDest()->getType()));
   const Variable *Dest = this->getDest();
   const Operand *Src = this->getSrc(0);
   static const XmmEmitterMovOps Emitter = {&Assembler::movq, &Assembler::movq,
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index 4b2fbc4..3acfbf4 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -60,7 +60,10 @@
     Sqrt,
     Stacksave,
     Stackrestore,
-    Trap
+    Trap,
+    // The intrinsics below are not part of the PNaCl specification.
+    LoadSubVector,
+    StoreSubVector
   };
 
   /// Operations that can be represented by the AtomicRMW intrinsic.
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 7564652..4577997 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -5279,6 +5279,14 @@
   case Intrinsics::Trap:
     _trap();
     return;
+  case Intrinsics::LoadSubVector: {
+    UnimplementedLoweringError(this, Instr);
+    return;
+  }
+  case Intrinsics::StoreSubVector: {
+    UnimplementedLoweringError(this, Instr);
+    return;
+  }
   case Intrinsics::UnknownIntrinsic:
     Func->setError("Should not be lowering UnknownIntrinsic");
     return;
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 9b71d46..56e0679 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -4043,6 +4043,14 @@
     _teq(getZero(), getZero(), TrapCodeZero);
     return;
   }
+  case Intrinsics::LoadSubVector: {
+    UnimplementedLoweringError(this, Instr); // Not required for PNaCl
+    return;
+  }
+  case Intrinsics::StoreSubVector: {
+    UnimplementedLoweringError(this, Instr); // Not required for PNaCl
+    return;
+  }
   case Intrinsics::UnknownIntrinsic:
     Func->setError("Should not be lowering UnknownIntrinsic");
     return;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 940954d..a7c89f9 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -904,10 +904,14 @@
     AutoMemorySandboxer<> _(this, &Value, &Mem);
     Context.insert<typename Traits::Insts::StoreP>(Value, Mem);
   }
-  void _storeq(Variable *Value, X86OperandMem *Mem) {
+  void _storeq(Operand *Value, X86OperandMem *Mem) {
     AutoMemorySandboxer<> _(this, &Value, &Mem);
     Context.insert<typename Traits::Insts::StoreQ>(Value, Mem);
   }
+  void _stored(Operand *Value, X86OperandMem *Mem) {
+    AutoMemorySandboxer<> _(this, &Value, &Mem);
+    Context.insert<typename Traits::Insts::StoreD>(Value, Mem);
+  }
   void _sub(Variable *Dest, Operand *Src0) {
     AutoMemorySandboxer<> _(this, &Dest, &Src0);
     Context.insert<typename Traits::Insts::Sub>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 8ecef55..d0a2aa3 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -4344,6 +4344,58 @@
   case Intrinsics::Trap:
     _ud2();
     return;
+  case Intrinsics::LoadSubVector: {
+    assert(llvm::isa<ConstantInteger32>(Instr->getArg(0)) &&
+           "LoadSubVector first argument must be a constant");
+    Variable *Dest = Instr->getDest();
+    Type Ty = Dest->getType();
+    auto *SubVectorSize = llvm::dyn_cast<ConstantInteger32>(Instr->getArg(0));
+    Operand *Addr = Instr->getArg(1);
+    X86OperandMem *Src = formMemoryOperand(Addr, Ty);
+    doMockBoundsCheck(Src);
+
+    if (Dest->isRematerializable()) {
+      Context.insert<InstFakeDef>(Dest);
+      return;
+    }
+
+    switch (SubVectorSize->getValue()) {
+    case 4:
+      _movd(Dest, Src);
+      break;
+    case 8:
+      _movq(Dest, Src);
+      break;
+    default:
+      Func->setError("Unexpected size for LoadSubVector");
+      return;
+    }
+    return;
+  }
+  case Intrinsics::StoreSubVector: {
+    assert(llvm::isa<ConstantInteger32>(Instr->getArg(0)) &&
+           "StoreSubVector first argument must be a constant");
+    auto *SubVectorSize = llvm::dyn_cast<ConstantInteger32>(Instr->getArg(0));
+    Operand *Value = Instr->getArg(1);
+    Operand *Addr = Instr->getArg(2);
+    X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
+    doMockBoundsCheck(NewAddr);
+
+    Value = legalizeToReg(Value);
+
+    switch (SubVectorSize->getValue()) {
+    case 4:
+      _stored(Value, NewAddr);
+      break;
+    case 8:
+      _storeq(Value, NewAddr);
+      break;
+    default:
+      Func->setError("Unexpected size for StoreSubVector");
+      return;
+    }
+    return;
+  }
   case Intrinsics::UnknownIntrinsic:
     Func->setError("Should not be lowering UnknownIntrinsic");
     return;