Subzero: Fix lowering of the fabs() intrinsic.

The pand instruction for masking off the sign bit can operate on a register or an m128 memory location, but not a 32-bit or 64-bit memory location.  This means we need to make sure f32 and f64 operands are first loaded into a register.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/1022123004
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 1cb079e..0a41cc8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -3094,6 +3094,10 @@
     Type Ty = Src->getType();
     Variable *Dest = Instr->getDest();
     Variable *T = makeVectorOfFabsMask(Ty);
+    // The pand instruction operates on an m128 memory operand, so if
+    // Src is an f32 or f64, we need to make sure it's in a register.
+    if (!isVectorType(Ty))
+      Src = legalizeToVar(Src);
     _pand(T, Src);
     if (isVectorType(Ty))
       _movp(Dest, T);
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index 10301bc..67153dd 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -279,16 +279,18 @@
   %r4 = fadd float %r2, %r3
   ret float %r4
 }
+;;; Specially check that the pand instruction doesn't try to operate on a 32-bit
+;;; (f32) memory operand, and instead uses two xmm registers.
 ; CHECK-LABEL: test_fabs_float
 ; CHECK: pcmpeqd
 ; CHECK: psrld
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
 ; CHECK: pcmpeqd
 ; CHECK: psrld
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
 ; CHECK: pcmpeqd
 ; CHECK: psrld
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
 
 define double @test_fabs_double(double %x) {
 entry:
@@ -298,16 +300,18 @@
   %r4 = fadd double %r2, %r3
   ret double %r4
 }
+;;; Specially check that the pand instruction doesn't try to operate on a 64-bit
+;;; (f64) memory operand, and instead uses two xmm registers.
 ; CHECK-LABEL: test_fabs_double
 ; CHECK: pcmpeqd
 ; CHECK: psrlq
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
 ; CHECK: pcmpeqd
 ; CHECK: psrlq
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
 ; CHECK: pcmpeqd
 ; CHECK: psrlq
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
 
 define <4 x float> @test_fabs_v4f32(<4 x float> %x) {
 entry: