Lower byte swap intrinsic. Clump the negate instruction w/ the bswap instruction as an "inplace" operation. One difference is that bswap has stricter requirements the operand type. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=stichnot@chromium.org, wala@chromium.org Review URL: https://codereview.chromium.org/401533002

commit: 7fa813b31abd816623b02a205b0b1bda51e6bfc8 [log] [tgz]
author: Jan Voung <jvoung@chromium.org> Fri Jul 18 13:01:08 2014 -0700
committer: Jan Voung <jvoung@chromium.org> Fri Jul 18 13:01:08 2014 -0700
tree: 5ef9815161cf09d8646e8d23d1007b96b229b101
parent: 49889239d4c7ab296c7430722d36032d905110b6 [diff]
diff --git a/crosstest/test_bitmanip.cpp b/crosstest/test_bitmanip.cpp
index 2ebe8a4..7106767 100644
--- a/crosstest/test_bitmanip.cpp
+++ b/crosstest/test_bitmanip.cpp

@@ -38,3 +38,8 @@
 
 FOR_ALL_BMI_OP_TYPES(X)
 #undef X
+
+#define X(type, builtin_name)                                                  \
+  type test_bswap(type a) { return builtin_name(a); }
+BSWAP_TABLE
+#undef X

diff --git a/crosstest/test_bitmanip.def b/crosstest/test_bitmanip.def
index b164ab7..0dac033 100644
--- a/crosstest/test_bitmanip.def
+++ b/crosstest/test_bitmanip.def

@@ -39,4 +39,10 @@
   FOR_ALL_BMI_TYPES_INST(X, ctpop)
 //#define X(inst, type)
 
+#define BSWAP_TABLE              \
+  /* type, builtin_name */       \
+  X(uint16_t, __builtin_bswap16) \
+  X(uint32_t, __builtin_bswap32) \
+  X(uint64_t, __builtin_bswap64)
+
 #endif // TEST_BIT_MANIP_DEF

diff --git a/crosstest/test_bitmanip.h b/crosstest/test_bitmanip.h
index 7c4efdb..d283d7f 100644
--- a/crosstest/test_bitmanip.h
+++ b/crosstest/test_bitmanip.h

@@ -22,3 +22,7 @@
 
 FOR_ALL_BMI_OP_TYPES(X)
 #undef X
+
+#define X(type, builtin_name) type test_bswap(type);
+BSWAP_TABLE
+#undef X

diff --git a/crosstest/test_bitmanip_main.cpp b/crosstest/test_bitmanip_main.cpp
index b3ad585..89dc8ba 100644
--- a/crosstest/test_bitmanip_main.cpp
+++ b/crosstest/test_bitmanip_main.cpp

@@ -28,27 +28,31 @@
 }
 
 volatile uint64_t Values[] = {
-    0,                    1,                    0x7e,
-    0x7f,                 0x80,                 0x81,
-    0xfe,                 0xff,                 0x7ffe,
-    0x7fff,               0x8000,               0x8001,
-    0xfffe,               0xffff,
-    0x007fffff /*Max subnormal + */,
-    0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */,
-    0x7f800000 /*+Inf*/,  0xff800000 /*-Inf*/,
-    0x7fa00000 /*SNaN*/,  0x7fc00000 /*QNaN*/,
-    0x7ffffffe,           0x7fffffff,           0x80000000,
-    0x80000001,           0xfffffffe,           0xffffffff,
-    0x100000000ll,        0x100000001ll,
-    0x000fffffffffffffll /*Max subnormal + */,
-    0x0010000000000000ll /*Min+ */,
-    0x7fefffffffffffffll /*Max+ */,
-    0x7ff0000000000000ll /*+Inf*/,
-    0xfff0000000000000ll /*-Inf*/,
-    0x7ff0000000000001ll /*SNaN*/,
-    0x7ff8000000000000ll /*QNaN*/,
-    0x7ffffffffffffffell, 0x7fffffffffffffffll, 0x8000000000000000ll,
-    0x8000000000000001ll, 0xfffffffffffffffell, 0xffffffffffffffffll };
+    0,                              1,
+    0x7e,                           0x7f,
+    0x80,                           0x81,
+    0xfe,                           0xff,
+    0x7ffe,                         0x7fff,
+    0x8000,                         0x8001,
+    0xfffe,                         0xffff,
+    0xc0de,                         0xabcd,
+    0xdcba,                         0x007fffff /*Max subnormal + */,
+    0x00800000 /*Min+ */,           0x7f7fffff /*Max+ */,
+    0x7f800000 /*+Inf*/,            0xff800000 /*-Inf*/,
+    0x7fa00000 /*SNaN*/,            0x7fc00000 /*QNaN*/,
+    0x7ffffffe,                     0x7fffffff,
+    0x80000000,                     0x80000001,
+    0xfffffffe,                     0xffffffff,
+    0x12345678,                     0xabcd1234,
+    0x1234dcba,                     0x100000000ll,
+    0x100000001ll,                  0x123456789abcdef1ll,
+    0x987654321ab1fedcll,           0x000fffffffffffffll /*Max subnormal + */,
+    0x0010000000000000ll /*Min+ */, 0x7fefffffffffffffll /*Max+ */,
+    0x7ff0000000000000ll /*+Inf*/,  0xfff0000000000000ll /*-Inf*/,
+    0x7ff0000000000001ll /*SNaN*/,  0x7ff8000000000000ll /*QNaN*/,
+    0x7ffffffffffffffell,           0x7fffffffffffffffll,
+    0x8000000000000000ll,           0x8000000000000001ll,
+    0xfffffffffffffffell,           0xffffffffffffffffll};
 
 const static size_t NumValues = sizeof(Values) / sizeof(*Values);
 
@@ -96,6 +100,25 @@
   }
 }
 
+template <typename Type>
+void testByteSwap(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+  for (size_t i = 0; i < NumValues; ++i) {
+    Type Value = static_cast<Type>(Values[i]);
+    ++TotalTests;
+    Type ResultSz = test_bswap(Value);
+    Type ResultLlc = Subzero_::test_bswap(Value);
+    if (ResultSz == ResultLlc) {
+      ++Passes;
+    } else {
+      ++Failures;
+      std::cout << "test_bswap" << (CHAR_BIT * sizeof(Type)) << "("
+                << static_cast<uint64_t>(Value)
+                << "): sz=" << static_cast<uint64_t>(ResultSz)
+                << " llc=" << static_cast<uint64_t>(ResultLlc) << "\n";
+    }
+  }
+}
+
 int main(int argc, char **argv) {
   size_t TotalTests = 0;
   size_t Passes = 0;
@@ -103,6 +126,9 @@
 
   testBitManip<uint32_t>(TotalTests, Passes, Failures);
   testBitManip<uint64_t>(TotalTests, Passes, Failures);
+  testByteSwap<uint16_t>(TotalTests, Passes, Failures);
+  testByteSwap<uint32_t>(TotalTests, Passes, Failures);
+  testByteSwap<uint64_t>(TotalTests, Passes, Failures);
 
   std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
             << " Failures=" << Failures << "\n";

diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index baa145f..f1a68da 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp

@@ -95,11 +95,6 @@
   addSource(Source2);
 }
 
-InstX8632Neg::InstX8632Neg(Cfg *Func, Operand *SrcDest)
-    : InstX8632(Func, InstX8632::Neg, 1, llvm::dyn_cast<Variable>(SrcDest)) {
-  addSource(SrcDest);
-}
-
 InstX8632Shld::InstX8632Shld(Cfg *Func, Variable *Dest, Variable *Source1,
                              Variable *Source2)
     : InstX8632(Func, InstX8632::Shld, 3, Dest) {
@@ -440,6 +435,9 @@
 }
 
 
+// In-place ops
+template <> const char *InstX8632Bswap::Opcode = "bswap";
+template <> const char *InstX8632Neg::Opcode = "neg";
 // Unary ops
 template <> const char *InstX8632Bsf::Opcode = "bsf";
 template <> const char *InstX8632Bsr::Opcode = "bsr";
@@ -473,6 +471,7 @@
 template <> const char *InstX8632Divps::Opcode = "divps";
 template <> const char *InstX8632Idiv::Opcode = "idiv";
 template <> const char *InstX8632Divss::Opcode = "divss";
+template <> const char *InstX8632Rol::Opcode = "rol";
 template <> const char *InstX8632Shl::Opcode = "shl";
 template <> const char *InstX8632Psll::Opcode = "psll";
 template <> const char *InstX8632Shr::Opcode = "shr";
@@ -611,21 +610,6 @@
   dumpSources(Func);
 }
 
-void InstX8632Neg::emit(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrEmit();
-  assert(getSrcSize() == 1);
-  Str << "\tneg\t";
-  getSrc(0)->emit(Func);
-  Str << "\n";
-}
-
-void InstX8632Neg::dump(const Cfg *Func) const {
-  Ostream &Str = Func->getContext()->getStrDump();
-  dumpDest(Func);
-  Str << " = neg." << getDest()->getType() << " ";
-  dumpSources(Func);
-}
-
 void InstX8632Shld::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 3);

diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index db60d68..b50199b 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h

@@ -141,6 +141,7 @@
     Br,
     Bsf,
     Bsr,
+    Bswap,
     Call,
     Cdq,
     Cmov,
@@ -188,6 +189,7 @@
     Push,
     Pxor,
     Ret,
+    Rol,
     Sar,
     Sbb,
     Shl,
@@ -352,6 +354,42 @@
   virtual ~InstX8632Call() {}
 };
 
+// Instructions of the form x := op(x).
+template <InstX8632::InstKindX8632 K>
+class InstX8632Inplaceop : public InstX8632 {
+public:
+  static InstX8632Inplaceop *create(Cfg *Func, Operand *SrcDest) {
+    return new (Func->allocate<InstX8632Inplaceop>())
+        InstX8632Inplaceop(Func, SrcDest);
+  }
+  virtual void emit(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrEmit();
+    assert(getSrcSize() == 1);
+    Str << "\t" << Opcode << "\t";
+    getSrc(0)->emit(Func);
+    Str << "\n";
+  }
+  virtual void dump(const Cfg *Func) const {
+    Ostream &Str = Func->getContext()->getStrDump();
+    dumpDest(Func);
+    Str << " = " << Opcode << "." << getDest()->getType() << " ";
+    dumpSources(Func);
+  }
+  static bool classof(const Inst *Inst) { return isClassof(Inst, K); }
+
+private:
+  InstX8632Inplaceop(Cfg *Func, Operand *SrcDest)
+      : InstX8632(Func, K, 1, llvm::dyn_cast<Variable>(SrcDest)) {
+    addSource(SrcDest);
+  }
+  InstX8632Inplaceop(const InstX8632Inplaceop &) LLVM_DELETED_FUNCTION;
+  InstX8632Inplaceop &
+  operator=(const InstX8632Inplaceop &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632Inplaceop() {}
+  static const char *Opcode;
+};
+
+// Instructions of the form x := op(y)
 template <InstX8632::InstKindX8632 K>
 class InstX8632Unaryop : public InstX8632 {
 public:
@@ -506,6 +544,8 @@
   static const char *Opcode;
 };
 
+typedef InstX8632Inplaceop<InstX8632::Bswap> InstX8632Bswap;
+typedef InstX8632Inplaceop<InstX8632::Neg> InstX8632Neg;
 typedef InstX8632Unaryop<InstX8632::Bsf> InstX8632Bsf;
 typedef InstX8632Unaryop<InstX8632::Bsr> InstX8632Bsr;
 typedef InstX8632Unaryop<InstX8632::Lea> InstX8632Lea;
@@ -535,6 +575,7 @@
 typedef InstX8632Binop<InstX8632::Pmuludq> InstX8632Pmuludq;
 typedef InstX8632Binop<InstX8632::Divps> InstX8632Divps;
 typedef InstX8632Binop<InstX8632::Divss> InstX8632Divss;
+typedef InstX8632Binop<InstX8632::Rol, true> InstX8632Rol;
 typedef InstX8632Binop<InstX8632::Shl, true> InstX8632Shl;
 typedef InstX8632Binop<InstX8632::Psll> InstX8632Psll;
 typedef InstX8632Binop<InstX8632::Shr, true> InstX8632Shr;
@@ -590,23 +631,6 @@
   virtual ~InstX8632Mul() {}
 };
 
-// Neg instruction - Two's complement negation.
-class InstX8632Neg : public InstX8632 {
-public:
-  static InstX8632Neg *create(Cfg *Func, Operand *SrcDest) {
-    return new (Func->allocate<InstX8632Neg>()) InstX8632Neg(Func, SrcDest);
-  }
-  virtual void emit(const Cfg *Func) const;
-  virtual void dump(const Cfg *Func) const;
-  static bool classof(const Inst *Inst) { return isClassof(Inst, Neg); }
-
-private:
-  InstX8632Neg(Cfg *Func, Operand *SrcDest);
-  InstX8632Neg(const InstX8632Neg &) LLVM_DELETED_FUNCTION;
-  InstX8632Neg &operator=(const InstX8632Neg &) LLVM_DELETED_FUNCTION;
-  virtual ~InstX8632Neg() {}
-};
-
 // Shld instruction - shift across a pair of operands.  TODO: Verify
 // that the validator accepts the shld instruction.
 class InstX8632Shld : public InstX8632 {

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index af9ebc5..b38481b 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -2596,9 +2596,35 @@
     _mfence();
     return;
   }
-  case Intrinsics::Bswap:
-    Func->setError("Unhandled intrinsic");
+  case Intrinsics::Bswap: {
+    Variable *Dest = Instr->getDest();
+    Operand *Val = Instr->getArg(0);
+    // In 32-bit mode, bswap only works on 32-bit arguments, and the
+    // argument must be a register. Use rotate left for 16-bit bswap.
+    if (Val->getType() == IceType_i64) {
+      Variable *T_Lo = legalizeToVar(loOperand(Val));
+      Variable *T_Hi = legalizeToVar(hiOperand(Val));
+      Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+      Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+      _bswap(T_Lo);
+      _bswap(T_Hi);
+      _mov(DestLo, T_Hi);
+      _mov(DestHi, T_Lo);
+    } else if (Val->getType() == IceType_i32) {
+      Variable *T = legalizeToVar(Val);
+      _bswap(T);
+      _mov(Dest, T);
+    } else {
+      assert(Val->getType() == IceType_i16);
+      Val = legalize(Val);
+      Constant *Eight = Ctx->getConstantInt(IceType_i16, 8);
+      Variable *T = NULL;
+      _mov(T, Val);
+      _rol(T, Eight);
+      _mov(Dest, T);
+    }
     return;
+  }
   case Intrinsics::Ctpop: {
     Variable *Dest = Instr->getDest();
     Operand *Val = Instr->getArg(0);

diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index fefc7fd..4c0c245 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h

@@ -196,6 +196,9 @@
   void _bsr(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Bsr::create(Func, Dest, Src0));
   }
+  void _bswap(Variable *SrcDest) {
+    Context.insert(InstX8632Bswap::create(Func, SrcDest));
+  }
   void _cdq(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Cdq::create(Func, Dest, Src0));
   }
@@ -342,6 +345,9 @@
   void _ret(Variable *Src0 = NULL) {
     Context.insert(InstX8632Ret::create(Func, Src0));
   }
+  void _rol(Variable *Dest, Operand *Src0) {
+    Context.insert(InstX8632Rol::create(Func, Dest, Src0));
+  }
   void _sar(Variable *Dest, Operand *Src0) {
     Context.insert(InstX8632Sar::create(Func, Dest, Src0));
   }

diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
index cebf923..dbebf11 100644
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll

@@ -22,6 +22,9 @@
 declare float @llvm.sqrt.f32(float)
 declare double @llvm.sqrt.f64(double)
 declare void @llvm.trap()
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
 declare i32 @llvm.ctlz.i32(i32, i1)
 declare i64 @llvm.ctlz.i64(i64, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
@@ -242,6 +245,33 @@
 ; CHECK-LABEL: test_trap
 ; CHECK: ud2
 
+define i32 @test_bswap_16(i32 %x) {
+entry:
+  %x_trunc = trunc i32 %x to i16
+  %r = call i16 @llvm.bswap.i16(i16 %x_trunc)
+  %r_zext = zext i16 %r to i32
+  ret i32 %r_zext
+}
+; CHECK-LABEL: test_bswap_16
+; CHECK: rol {{.*}}, 8
+
+define i32 @test_bswap_32(i32 %x) {
+entry:
+  %r = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %r
+}
+; CHECK-LABEL: test_bswap_32
+; CHECK: bswap e{{.*}}
+
+define i64 @test_bswap_64(i64 %x) {
+entry:
+  %r = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %r
+}
+; CHECK-LABEL: test_bswap_64
+; CHECK: bswap e{{.*}}
+; CHECK: bswap e{{.*}}
+
 define i32 @test_ctlz_32(i32 %x) {
 entry:
   %r = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
commit	7fa813b31abd816623b02a205b0b1bda51e6bfc8	[log] [tgz]
author	Jan Voung <jvoung@chromium.org>	Fri Jul 18 13:01:08 2014 -0700
committer	Jan Voung <jvoung@chromium.org>	Fri Jul 18 13:01:08 2014 -0700
tree	5ef9815161cf09d8646e8d23d1007b96b229b101
parent	49889239d4c7ab296c7430722d36032d905110b6 [diff]