Subzero: Don't use key SSE instructions on potentially unaligned loads.

The non-mov-like SSE instructions generally require 16-byte aligned memory operands.  The PNaCl bitcode ABI only guarantees 4-byte alignment or less on vector loads and stores.  Subzero maintains stack alignment so stack memory operands are fine.

We handle this by legalizing memory operands into a register wherever there is doubt.

This bug was first discovered on the vector_align scons test.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4083
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4133
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/1024253003
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index e99516b..662ac54 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -1375,6 +1375,8 @@
   } else if (isVectorType(Dest->getType())) {
     // TODO: Trap on integer divide and integer modulo by zero.
     // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
+    if (llvm::isa<OperandX8632Mem>(Src1))
+      Src1 = legalizeToVar(Src1);
     switch (Inst->getOp()) {
     case InstArithmetic::_num:
       llvm_unreachable("Unknown arithmetic operator");
@@ -2090,6 +2092,8 @@
       assert(Dest->getType() == IceType_v4i32 &&
              Inst->getSrc(0)->getType() == IceType_v4f32);
       Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
+      if (llvm::isa<OperandX8632Mem>(Src0RM))
+        Src0RM = legalizeToVar(Src0RM);
       Variable *T = makeReg(Dest->getType());
       _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
       _movp(Dest, T);
@@ -2165,6 +2169,8 @@
       assert(Dest->getType() == IceType_v4f32 &&
              Inst->getSrc(0)->getType() == IceType_v4i32);
       Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
+      if (llvm::isa<OperandX8632Mem>(Src0RM))
+        Src0RM = legalizeToVar(Src0RM);
       Variable *T = makeReg(Dest->getType());
       _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
       _movp(Dest, T);
@@ -2472,6 +2478,8 @@
     } else {
       Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
       Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
+      if (llvm::isa<OperandX8632Mem>(Src1RM))
+        Src1RM = legalizeToVar(Src1RM);
 
       switch (Condition) {
       default: {
@@ -2609,10 +2617,14 @@
       llvm_unreachable("unexpected condition");
       break;
     case InstIcmp::Eq: {
+      if (llvm::isa<OperandX8632Mem>(Src1RM))
+        Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpeq(T, Src1RM);
     } break;
     case InstIcmp::Ne: {
+      if (llvm::isa<OperandX8632Mem>(Src1RM))
+        Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpeq(T, Src1RM);
       Variable *MinusOne = makeVectorOfMinusOnes(Ty);
@@ -2620,12 +2632,16 @@
     } break;
     case InstIcmp::Ugt:
     case InstIcmp::Sgt: {
+      if (llvm::isa<OperandX8632Mem>(Src1RM))
+        Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpgt(T, Src1RM);
     } break;
     case InstIcmp::Uge:
     case InstIcmp::Sge: {
       // !(Src1RM > Src0RM)
+      if (llvm::isa<OperandX8632Mem>(Src0RM))
+        Src0RM = legalizeToVar(Src0RM);
       _movp(T, Src1RM);
       _pcmpgt(T, Src0RM);
       Variable *MinusOne = makeVectorOfMinusOnes(Ty);
@@ -2633,12 +2649,16 @@
     } break;
     case InstIcmp::Ult:
     case InstIcmp::Slt: {
+      if (llvm::isa<OperandX8632Mem>(Src0RM))
+        Src0RM = legalizeToVar(Src0RM);
       _movp(T, Src1RM);
       _pcmpgt(T, Src0RM);
     } break;
     case InstIcmp::Ule:
     case InstIcmp::Sle: {
       // !(Src0RM > Src1RM)
+      if (llvm::isa<OperandX8632Mem>(Src1RM))
+        Src1RM = legalizeToVar(Src1RM);
       _movp(T, Src0RM);
       _pcmpgt(T, Src1RM);
       Variable *MinusOne = makeVectorOfMinusOnes(Ty);
@@ -3092,8 +3112,12 @@
     Variable *T = makeVectorOfFabsMask(Ty);
     // The pand instruction operates on an m128 memory operand, so if
     // Src is an f32 or f64, we need to make sure it's in a register.
-    if (!isVectorType(Ty))
+    if (isVectorType(Ty)) {
+      if (llvm::isa<OperandX8632Mem>(Src))
+        Src = legalizeToVar(Src);
+    } else {
       Src = legalizeToVar(Src);
+    }
     _pand(T, Src);
     if (isVectorType(Ty))
       _movp(Dest, T);