Subzero: Fix lowering of the fabs() intrinsic.
The pand instruction for masking off the sign bit can operate on a register or an m128 memory location, but not a 32-bit or 64-bit memory location. This means we need to make sure f32 and f64 operands are first loaded into a register.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/1022123004
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 1cb079e..0a41cc8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -3094,6 +3094,10 @@
Type Ty = Src->getType();
Variable *Dest = Instr->getDest();
Variable *T = makeVectorOfFabsMask(Ty);
+ // The pand instruction operates on an m128 memory operand, so if
+ // Src is an f32 or f64, we need to make sure it's in a register.
+ if (!isVectorType(Ty))
+ Src = legalizeToVar(Src);
_pand(T, Src);
if (isVectorType(Ty))
_movp(Dest, T);
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index 10301bc..67153dd 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -279,16 +279,18 @@
%r4 = fadd float %r2, %r3
ret float %r4
}
+;;; Specially check that the pand instruction doesn't try to operate on a 32-bit
+;;; (f32) memory operand, and instead uses two xmm registers.
; CHECK-LABEL: test_fabs_float
; CHECK: pcmpeqd
; CHECK: psrld
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd
; CHECK: psrld
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd
; CHECK: psrld
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
define double @test_fabs_double(double %x) {
entry:
@@ -298,16 +300,18 @@
%r4 = fadd double %r2, %r3
ret double %r4
}
+;;; Specially check that the pand instruction doesn't try to operate on a 64-bit
+;;; (f64) memory operand, and instead uses two xmm registers.
; CHECK-LABEL: test_fabs_double
; CHECK: pcmpeqd
; CHECK: psrlq
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd
; CHECK: psrlq
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
; CHECK: pcmpeqd
; CHECK: psrlq
-; CHECK: pand
+; CHECK: pand {{.*}}xmm{{.*}}xmm
define <4 x float> @test_fabs_v4f32(<4 x float> %x) {
entry: