Lower icmp operations between vector values. SSE2 only has signed integer comparison. Unsigned compares are implemented by inverting the sign bits of the operands and doing a signed compare. A common pattern in clang generated IR is a vector compare which generates an i1 vector followed by a sign extension of the result of the compare. The x86 comparison instructions already generate sign extended values, so we can eliminate unnecessary sext operations that follow compares in the IR. BUG=none R=jvoung@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/412593002

commit: 9a0168a9ccfa574e67baebe68650a85a18292d30 [log] [tgz]
author: Matt Wala <wala@chromium.org> Wed Jul 23 14:56:10 2014 -0700
committer: Matt Wala <wala@chromium.org> Wed Jul 23 14:56:10 2014 -0700
tree: 63dac0cf4f1aba2a9576ea32a1bebe67cf7e090c
parent: 87543355ff42e591d3f26505ef26a0df3ce1d1b3 [diff] [blame]
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 3808ecb..71b4c17 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -2261,6 +2261,124 @@
   Operand *Src1 = legalize(Inst->getSrc(1));
   Variable *Dest = Inst->getDest();
 
+  if (isVectorType(Dest->getType())) {
+    Type Ty = Src0->getType();
+    // Promote i1 vectors to 128 bit integer vector types.
+    if (typeElementType(Ty) == IceType_i1) {
+      Type NewTy = IceType_NUM;
+      switch (Ty) {
+      default:
+        llvm_unreachable("unexpected type");
+        break;
+      case IceType_v4i1:
+        NewTy = IceType_v4i32;
+        break;
+      case IceType_v8i1:
+        NewTy = IceType_v8i16;
+        break;
+      case IceType_v16i1:
+        NewTy = IceType_v16i8;
+        break;
+      }
+      Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
+      Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
+      lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
+      lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
+      Src0 = NewSrc0;
+      Src1 = NewSrc1;
+      Ty = NewTy;
+    }
+
+    InstIcmp::ICond Condition = Inst->getCondition();
+
+    // SSE2 only has signed comparison operations.  Transform unsigned
+    // inputs in a manner that allows for the use of signed comparison
+    // operations by flipping the high order bits.
+    if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
+        Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
+      Variable *T0 = makeReg(Ty);
+      Variable *T1 = makeReg(Ty);
+      Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
+      _movp(T0, Src0);
+      _pxor(T0, HighOrderBits);
+      _movp(T1, Src1);
+      _pxor(T1, HighOrderBits);
+      Src0 = T0;
+      Src1 = T1;
+    }
+
+    // TODO: ALIGNHACK: Both operands to compare instructions need to be
+    // in registers until stack alignment support is implemented.  Once
+    // there is support for stack alignment, LEGAL_HACK can be removed.
+#define LEGAL_HACK(Vect) legalizeToVar((Vect))
+    Variable *T = makeReg(Ty);
+    switch (Condition) {
+    default:
+      llvm_unreachable("unexpected condition");
+      break;
+    case InstIcmp::Eq: {
+      _movp(T, Src0);
+      _pcmpeq(T, LEGAL_HACK(Src1));
+    } break;
+    case InstIcmp::Ne: {
+      _movp(T, Src0);
+      _pcmpeq(T, LEGAL_HACK(Src1));
+      Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+      _pxor(T, MinusOne);
+    } break;
+    case InstIcmp::Ugt:
+    case InstIcmp::Sgt: {
+      _movp(T, Src0);
+      _pcmpgt(T, LEGAL_HACK(Src1));
+    } break;
+    case InstIcmp::Uge:
+    case InstIcmp::Sge: {
+      // !(Src1 > Src0)
+      _movp(T, Src1);
+      _pcmpgt(T, LEGAL_HACK(Src0));
+      Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+      _pxor(T, MinusOne);
+    } break;
+    case InstIcmp::Ult:
+    case InstIcmp::Slt: {
+      _movp(T, Src1);
+      _pcmpgt(T, LEGAL_HACK(Src0));
+    } break;
+    case InstIcmp::Ule:
+    case InstIcmp::Sle: {
+      // !(Src0 > Src1)
+      _movp(T, Src0);
+      _pcmpgt(T, LEGAL_HACK(Src1));
+      Variable *MinusOne = makeVectorOfMinusOnes(Ty);
+      _pxor(T, MinusOne);
+    } break;
+    }
+#undef LEGAL_HACK
+
+    _movp(Dest, T);
+
+    // The following pattern occurs often in lowered C and C++ code:
+    //
+    //   %cmp     = icmp pred <n x ty> %src0, %src1
+    //   %cmp.ext = sext <n x i1> %cmp to <n x ty>
+    //
+    // We can avoid the sext operation by copying the result from pcmpgt
+    // and pcmpeq, which is already sign extended, to the result of the
+    // sext operation
+    if (InstCast *NextCast =
+            llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
+      if (NextCast->getCastKind() == InstCast::Sext &&
+          NextCast->getSrc(0) == Dest) {
+        _movp(NextCast->getDest(), T);
+        // Skip over the instruction.
+        NextCast->setDeleted();
+        Context.advanceNext();
+      }
+    }
+
+    return;
+  }
+
   // If Src1 is an immediate, or known to be a physical register, we can
   // allow Src0 to be a memory operand.  Otherwise, Src0 must be copied into
   // a physical register.  (Actually, either Src0 or Src1 can be chosen for
@@ -3398,9 +3516,14 @@
   lowerCall(Call);
 }
 
+// There is no support for loading or emitting vector constants, so the
+// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
+// etc. are initialized with register operations.
+//
+// TODO(wala): Add limited support for vector constants so that
+// complex initialization in registers is unnecessary.
+
 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
-  // There is no support for loading or emitting vector constants, so
-  // this value is initialized using register operations.
   Variable *Reg = makeReg(Ty, RegNum);
   // Insert a FakeDef, since otherwise the live range of Reg might
   // be overestimated.
@@ -3409,18 +3532,41 @@
   return Reg;
 }
 
+Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
+  Variable *MinusOnes = makeReg(Ty, RegNum);
+  // Insert a FakeDef so the live range of MinusOnes is not overestimated.
+  Context.insert(InstFakeDef::create(Func, MinusOnes));
+  _pcmpeq(MinusOnes, MinusOnes);
+  return MinusOnes;
+}
+
 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
-  // There is no support for loading or emitting vector constants, so
-  // this value is initialized using register operations.
   Variable *Dest = makeVectorOfZeros(Ty, RegNum);
-  Variable *MinusOne = makeReg(Ty);
-  // Insert a FakeDef so the live range of MinusOne is not overestimated.
-  Context.insert(InstFakeDef::create(Func, MinusOne));
-  _pcmpeq(MinusOne, MinusOne);
+  Variable *MinusOne = makeVectorOfMinusOnes(Ty);
   _psub(Dest, MinusOne);
   return Dest;
 }
 
+Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
+  assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
+         Ty == IceType_v16i8);
+  if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
+    Variable *Reg = makeVectorOfOnes(Ty, RegNum);
+    SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
+    _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
+    return Reg;
+  } else {
+    // SSE has no left shift operation for vectors of 8 bit integers.
+    const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
+    Constant *ConstantMask =
+        Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
+    Variable *Reg = makeReg(Ty, RegNum);
+    _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
+    _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
+    return Reg;
+  }
+}
+
 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
                                                            Variable *Slot,
                                                            uint32_t Offset) {
commit	9a0168a9ccfa574e67baebe68650a85a18292d30	[log] [tgz]
author	Matt Wala <wala@chromium.org>	Wed Jul 23 14:56:10 2014 -0700
committer	Matt Wala <wala@chromium.org>	Wed Jul 23 14:56:10 2014 -0700
tree	63dac0cf4f1aba2a9576ea32a1bebe67cf7e090c
parent	87543355ff42e591d3f26505ef26a0df3ce1d1b3 [diff] [blame]