ARM32: Lower more integer intrinsics and test.

Lower stacksave/restore.
Lower ctlz, cttz, bswap, and popcount. Popcount is just
done with a helper call. Ctz can use the clz instruction
after reversing the bits.

We can only crosstest stacksave/restore for now which
happens to be written in C for the C99 VLAs. The CXX
crosstests I can't seem to compile with the arm-cross-g++
(missing headers), so I will check that later after
resolving the cross compilation issue.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=jpp@chromium.org

Review URL: https://codereview.chromium.org/1222943003 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 72178ac..e95f6b1 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -80,18 +80,15 @@
 }
 
 void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
-                                   const InstARM32Pred *Inst, const Cfg *Func) {
+                                   const InstARM32Pred *Inst, const Cfg *Func,
+                                   bool NeedsWidthSuffix) {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(Inst->getSrcSize() == 1);
   Type SrcTy = Inst->getSrc(0)->getType();
-  Type DestTy = Inst->getDest()->getType();
   Str << "\t" << Opcode;
-  // Sxt and Uxt need source type width letter to define the operation.
-  // The other unary operations have the same source and dest type and
-  // as a result need only one letter.
-  if (SrcTy != DestTy)
+  if (NeedsWidthSuffix)
     Str << getWidthString(SrcTy);
-  Str << "\t";
+  Str << Inst->getPredicate() << "\t";
   Inst->getDest()->emit(Func);
   Str << ", ";
   Inst->getSrc(0)->emit(Func);
@@ -358,7 +355,10 @@
 template <> const char *InstARM32Movt::Opcode = "movt";
 // Unary ops
 template <> const char *InstARM32Movw::Opcode = "movw";
+template <> const char *InstARM32Clz::Opcode = "clz";
 template <> const char *InstARM32Mvn::Opcode = "mvn";
+template <> const char *InstARM32Rbit::Opcode = "rbit";
+template <> const char *InstARM32Rev::Opcode = "rev";
 template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h
 template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h
 // Mov-like ops