Factor out prelowerPhi for 32-bit targets. Disable adv phi lowering for ARM.

This way, prelowerPhi can be shared between 32-bit targets (split 64-bit
values into 32-bit ones, and legalize undef). Suggestions from template
experts on how to share prelowerPhi welcome. I'm not particularly happy
with the first pass in that legalizeUndef has to be made public (though
other methods used are also public). Also the methods required from the
template type TargetT aren't clear without looking through the code.

The current advanced phi lowering code depends on lowerPhiAssignments.
That is a special case of lowerAssign that does some adhoc register
allocation. The current adhoc register allocation doesn't work as
well when a target may need to spill more than one register.
Disable that optimization for ARM for now, until we have a better
way that works for ARM, and enable O2 cross testing on ARM.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1223133007 .
diff --git a/Makefile.standalone b/Makefile.standalone
index 0d0bad2..47afc8b 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -328,10 +328,10 @@
 	  --toolchain-root $(TOOLCHAIN_ROOT) \
 	  -i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \
 	  -i x8632,sandbox,sse4.1,Om1 \
-	  -i arm32,native,neon,Om1,simple_loop \
-	  -i arm32,native,neon,Om1,mem_intrin \
-	  -i arm32,native,neon,Om1,test_stacksave \
-	  -i arm32,native,neon,Om1,test_strengthreduce
+	  -i arm32,native,neon,simple_loop \
+	  -i arm32,native,neon,mem_intrin \
+	  -i arm32,native,neon,test_stacksave \
+	  -i arm32,native,neon,test_strengthreduce
 	PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
 	$(LLVM_SRC_PATH)/utils/lit/lit.py -sv crosstest/Output
 endif
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index f1aebd2..2d2e641 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -390,7 +390,15 @@
   OutFlags.setFunctionSections(::FunctionSections);
   OutFlags.setNumTranslationThreads(::NumThreads);
   OutFlags.setOptLevel(::OLevel);
-  OutFlags.setPhiEdgeSplit(::EnablePhiEdgeSplit);
+  if (::TargetArch == Target_ARM32) {
+    // TODO(jvoung): We need lowerPhiAssignments to handle spilling
+    // more than one register, since some ARM lowerAssign sequences
+    // may require more than one register. For now, disable PhiEdgeSplit
+    // to avoid requiring lowerPhiAssignments.
+    OutFlags.setPhiEdgeSplit(false);
+  } else {
+    OutFlags.setPhiEdgeSplit(::EnablePhiEdgeSplit);
+  }
   OutFlags.setRandomSeed(::RandomSeed);
   OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion);
   OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation);
diff --git a/src/IcePhiLoweringImpl.h b/src/IcePhiLoweringImpl.h
new file mode 100644
index 0000000..cf932d6
--- /dev/null
+++ b/src/IcePhiLoweringImpl.h
@@ -0,0 +1,61 @@
+//===------ subzero/src/IcePhiLoweringImpl.h - Phi lowering -----*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This contains utilities for targets to lower Phis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEPHILOWERINGIMPL_H
+#define SUBZERO_SRC_ICEPHILOWERINGIMPL_H
+
+#include "IceCfg.h"
+#include "IceCfgNode.h"
+#include "IceDefs.h"
+#include "IceInst.h"
+#include "IceOperand.h"
+
+namespace Ice {
+namespace PhiLowering {
+
+// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
+// preserve integrity of liveness analysis.  This is needed for 32-bit
+// targets.  This assumes the 32-bit target has loOperand, hiOperand,
+// and legalizeUndef methods.  Undef values are also legalized, since
+// loOperand() and hiOperand() don't expect Undef input.
+template <class TargetT>
+void prelowerPhis32Bit(TargetT *Target, CfgNode *Node, Cfg *Func) {
+  for (Inst &I : Node->getPhis()) {
+    auto Phi = llvm::dyn_cast<InstPhi>(&I);
+    if (Phi->isDeleted())
+      continue;
+    Variable *Dest = Phi->getDest();
+    if (Dest->getType() == IceType_i64) {
+      Variable *DestLo = llvm::cast<Variable>(Target->loOperand(Dest));
+      Variable *DestHi = llvm::cast<Variable>(Target->hiOperand(Dest));
+      InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
+      InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
+      for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
+        Operand *Src = Phi->getSrc(I);
+        CfgNode *Label = Phi->getLabel(I);
+        Src = Target->legalizeUndef(Src);
+        PhiLo->addArgument(Target->loOperand(Src), Label);
+        PhiHi->addArgument(Target->hiOperand(Src), Label);
+      }
+      Node->getPhis().push_back(PhiLo);
+      Node->getPhis().push_back(PhiHi);
+      Phi->setDeleted();
+    }
+  }
+}
+
+} // end of namespace PhiLowering
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEPHILOWERINGIMPL_H
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 0dbcfb1..e09a85f 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -24,6 +24,7 @@
 #include "IceInstARM32.h"
 #include "IceLiveness.h"
 #include "IceOperand.h"
+#include "IcePhiLoweringImpl.h"
 #include "IceRegistersARM32.h"
 #include "IceTargetLoweringARM32.def"
 #include "IceUtils.h"
@@ -2410,12 +2411,8 @@
   _trap();
 }
 
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis.  Undef values are also
-// turned into zeroes, since loOperand() and hiOperand() don't expect
-// Undef input.
 void TargetARM32::prelowerPhis() {
-  UnimplementedError(Func->getContext()->getFlags());
+  PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
 }
 
 // Lower the pre-ordered list of assignments into mov instructions.
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 4c95d3c..87faf01 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -103,6 +103,7 @@
   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
     return CPUFeatures.hasFeature(I);
   }
+  Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
 
 protected:
   explicit TargetARM32(Cfg *Func);
@@ -146,7 +147,6 @@
   Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
                     int32_t RegNum = Variable::NoRegister);
   Variable *legalizeToVar(Operand *From, int32_t RegNum = Variable::NoRegister);
-  Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
 
   Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 69b88ee..7963b01 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -159,6 +159,7 @@
   typename Traits::InstructionSet getInstructionSet() const final {
     return InstructionSet;
   }
+  Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
 
 protected:
   explicit TargetX86Base(Cfg *Func);
@@ -230,7 +231,6 @@
   Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
                     int32_t RegNum = Variable::NoRegister);
   Variable *legalizeToVar(Operand *From, int32_t RegNum = Variable::NoRegister);
-  Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
   /// Legalize the first source operand for use in the cmp instruction.
   Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1);
   /// Turn a pointer operand into a memory operand that can be
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 0ac63b8..cf786f4 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -25,6 +25,7 @@
 #include "IceGlobalInits.h"
 #include "IceLiveness.h"
 #include "IceOperand.h"
+#include "IcePhiLoweringImpl.h"
 #include "IceUtils.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -4696,30 +4697,8 @@
   // Pause constant blinding or pooling, blinding or pooling will be done later
   // during phi lowering assignments
   BoolFlagSaver B(RandomizationPoolingPaused, true);
-
-  CfgNode *Node = Context.getNode();
-  for (Inst &I : Node->getPhis()) {
-    auto Phi = llvm::dyn_cast<InstPhi>(&I);
-    if (Phi->isDeleted())
-      continue;
-    Variable *Dest = Phi->getDest();
-    if (Dest->getType() == IceType_i64) {
-      Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
-      Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
-      InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
-      InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
-      for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
-        Operand *Src = Phi->getSrc(I);
-        CfgNode *Label = Phi->getLabel(I);
-        Src = legalizeUndef(Src);
-        PhiLo->addArgument(loOperand(Src), Label);
-        PhiHi->addArgument(hiOperand(Src), Label);
-      }
-      Node->getPhis().push_back(PhiLo);
-      Node->getPhis().push_back(PhiHi);
-      Phi->setDeleted();
-    }
-  }
+  PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
+      this, Context.getNode(), Func);
 }
 
 bool isMemoryOperand(const Operand *Opnd) {
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
index b88babe..cc32a87 100644
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -158,6 +158,28 @@
 ; ARM32:      bl      {{.*}} ignore64BitArgNoInline
 ; ARM32:      add     sp, {{.*}} #16
 
+define internal i32 @pass64BitUndefArg() {
+entry:
+  %call = call i32 @ignore64BitArgNoInline(i64 0, i32 123, i64 undef)
+  ret i32 %call
+}
+; CHECK-LABEL: pass64BitUndefArg
+; CHECK: sub esp
+; CHECK: mov DWORD PTR{{.*}},0x7b
+; CHECK: mov DWORD PTR{{.*}},0x0
+; CHECK: call {{.*}} R_{{.*}} ignore64BitArgNoInline
+; OPTM1-LABEL: pass64BitUndefArg
+; OPTM1: sub esp
+; OPTM1: mov DWORD PTR{{.*}},0x7b
+; OPTM1: mov DWORD PTR{{.*}},0x0
+; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
+; ARM32-LABEL: pass64BitUndefArg
+; ARM32: sub sp
+; ARM32: movw {{.*}}, #0
+; ARM32: str
+; ARM32: movw {{.*}}, #123
+; ARM32: bl {{.*}} ignore64BitArgNoInline
+
 define internal i64 @return64BitArg(i64 %padding, i64 %a) {
 entry:
   ret i64 %a
@@ -1744,3 +1766,49 @@
 ; OPTM1-NOT: cmp 0x{{[0-9a-f]+}},
 ; ARM32-LABEL: icmpLt64Imm
 ; ARM32-NOT: cmp #{{[0-9a-f]+}},
+
+define internal i64 @phi64Imm(i32 %x, i64 %y, i64 %z) {
+entry:
+  %cond = icmp eq i32 %x, 88
+  br i1 %cond, label %branch1, label %branch2
+branch1:
+  %tmp = add i64 %y, %z
+  br label %branch2
+
+branch2:
+  %merge = phi i64 [ %tmp, %branch1 ], [ 20014547621496, %entry ]
+  ret i64 %merge
+}
+; CHECK-LABEL: phi64Imm
+; CHECK: mov {{.*}},0x5678
+; CHECK: mov {{.*}},0x1234
+; OPTM1-LABEL: phi64Imm
+; OPTM1: mov {{.*}},0x5678
+; OPTM1: mov {{.*}},0x1234
+; ARM32-LABEL: phi64Imm
+; ARM32: movw {{.*}}, #22136 ; 0x5678
+; ARM32: movw {{.*}}, #4660  ; 0x1234
+
+define internal i64 @phi64Undef(i32 %x, i64 %y, i64 %z) {
+entry:
+  %cond = icmp eq i32 %x, 88
+  br i1 %cond, label %branch1, label %branch2
+branch1:
+  %tmp = add i64 %y, %z
+  br label %branch2
+
+branch2:
+  %merge = phi i64 [ %tmp, %branch1 ], [ undef, %entry ]
+  ret i64 %merge
+}
+
+; CHECK-LABEL: phi64Undef
+; CHECK: mov {{.*}},0x0
+; CHECK: mov {{.*}},0x0
+; OPTM1-LABEL: phi64Undef
+; OPTM1: mov {{.*}},0x0
+; OPTM1: mov {{.*}},0x0
+; ARM32-LABEL: phi64Undef
+; ARM32: mov {{.*}} #0
+; ARM32: mov {{.*}} #0
+