Subzero: Fix Phi lowering.

This addresses a TODO and fixes a code generation bug that was causing sprintf and _vfprintf_r to execute incorrectly.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/560773002
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 696a2f0..f6b4a98 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -115,20 +115,62 @@
 // critical edges, add the assignments, and lower them.  This should
 // reduce the amount of shuffling at the end of each block.
 void CfgNode::placePhiStores() {
-  // Find the insertion point.  TODO: After branch/compare fusing is
-  // implemented, try not to insert Phi stores between the compare and
-  // conditional branch instructions, otherwise the branch/compare
-  // pattern matching may fail.  However, the branch/compare sequence
-  // will have to be broken if the compare result is read (by the
-  // assignment) before it is written (by the compare).
+  // Find the insertion point.
   InstList::iterator InsertionPoint = Insts.end();
-  // Every block must end in a terminator instruction.
+  // Every block must end in a terminator instruction, and therefore
+  // must have at least one instruction, so it's valid to decrement
+  // InsertionPoint (but assert just in case).
   assert(InsertionPoint != Insts.begin());
   --InsertionPoint;
   // Confirm that InsertionPoint is a terminator instruction.  Calling
   // getTerminatorEdges() on a non-terminator instruction will cause
   // an llvm_unreachable().
   (void)(*InsertionPoint)->getTerminatorEdges();
+  // SafeInsertionPoint is always immediately before the terminator
+  // instruction.  If the block ends in a compare and conditional
+  // branch, it's better to place the Phi store before the compare so
+  // as not to interfere with compare/branch fusing.  However, if the
+  // compare instruction's dest operand is the same as the new
+  // assignment statement's source operand, this can't be done due to
+  // data dependences, so we need to fall back to the
+  // SafeInsertionPoint.  To illustrate:
+  //   ; <label>:95
+  //   %97 = load i8* %96, align 1
+  //   %98 = icmp ne i8 %97, 0
+  //   br i1 %98, label %99, label %2132
+  //   ; <label>:99
+  //   %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
+  //   %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
+  // would be Phi-lowered as:
+  //   ; <label>:95
+  //   %97 = load i8* %96, align 1
+  //   %100_phi = %97 ; can be at InsertionPoint
+  //   %98 = icmp ne i8 %97, 0
+  //   %101_phi = %98 ; must be at SafeInsertionPoint
+  //   br i1 %98, label %99, label %2132
+  //   ; <label>:99
+  //   %100 = %100_phi
+  //   %101 = %101_phi
+  //
+  // TODO(stichnot): It may be possible to bypass this whole
+  // SafeInsertionPoint mechanism.  If a source basic block ends in a
+  // conditional branch:
+  //   labelSource:
+  //   ...
+  //   br i1 %foo, label %labelTrue, label %labelFalse
+  // and a branch target has a Phi involving the branch operand:
+  //   labelTrue:
+  //   %bar = phi i1 [ %foo, %labelSource ], ...
+  // then we actually know the constant i1 value of the Phi operand:
+  //   labelTrue:
+  //   %bar = phi i1 [ true, %labelSource ], ...
+  // It seems that this optimization should be done by clang or opt,
+  // but we could also do it here.
+  InstList::iterator SafeInsertionPoint = InsertionPoint;
+  // Keep track of the dest variable of a compare instruction, so that
+  // we insert the new instruction at the SafeInsertionPoint if the
+  // compare's dest matches the Phi-lowered assignment's source.
+  Variable *CmpInstDest = NULL;
   // If the current insertion point is at a conditional branch
   // instruction, and the previous instruction is a compare
   // instruction, then we move the insertion point before the compare
@@ -137,8 +179,10 @@
     if (!Branch->isUnconditional()) {
       if (InsertionPoint != Insts.begin()) {
         --InsertionPoint;
-        if (!llvm::isa<InstIcmp>(*InsertionPoint) &&
-            !llvm::isa<InstFcmp>(*InsertionPoint)) {
+        if (llvm::isa<InstIcmp>(*InsertionPoint) ||
+            llvm::isa<InstFcmp>(*InsertionPoint)) {
+          CmpInstDest = (*InsertionPoint)->getDest();
+        } else {
           ++InsertionPoint;
         }
       }
@@ -165,7 +209,10 @@
         Dest->setPreferredRegister(Src, AllowOverlap);
         Src->setPreferredRegister(Dest, AllowOverlap);
       }
-      Insts.insert(InsertionPoint, NewInst);
+      if (CmpInstDest == Operand)
+        Insts.insert(SafeInsertionPoint, NewInst);
+      else
+        Insts.insert(InsertionPoint, NewInst);
       NewInst->updateVars(this);
     }
   }
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index e704ed3..986ff76 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -97,6 +97,11 @@
     DisableGlobals("disable-globals",
                    cl::desc("Disable global initializer translation"));
 
+// This is currently unused, and is a placeholder for lit tests.
+static cl::opt<bool>
+    DisablePhiEdgeSplit("no-phi-edge-split",
+                        cl::desc("Disable edge splitting for Phi lowering"));
+
 static cl::opt<NaClFileFormat> InputFileFormat(
     "bitcode-format", cl::desc("Define format of input file:"),
     cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
new file mode 100644
index 0000000..ae96063
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -0,0 +1,60 @@
+; This tests some of the subtleties of Phi lowering.  In particular,
+; it tests that it does the right thing when it tries to enable
+; compare/branch fusing.
+
+; RUN: %llvm2ice -O2 --verbose none --no-phi-edge-split %s \
+; RUN:   | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN:   | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN:                           | FileCheck --check-prefix=DUMP %s
+
+define internal i32 @testPhi1(i32 %arg) {
+entry:
+  %cmp1 = icmp sgt i32 %arg, 0
+  br i1 %cmp1, label %next, label %target
+next:
+  br label %target
+target:
+  %merge = phi i1 [ %cmp1, %entry ], [ false, %next ]
+  %result = zext i1 %merge to i32
+  ret i32 %result
+}
+; Test that compare/branch fusing does not happen, and Phi lowering is
+; put in the right place.
+; CHECK-LABEL: testPhi1
+; CHECK: cmp {{.*}}, 0
+; CHECK: mov {{.*}}, 1
+; CHECK: jg
+; CHECK: mov {{.*}}, 0
+; CHECK: mov [[PHI:.*]],
+; CHECK: cmp {{.*}}, 0
+; CHECK: jne
+; CHECK: :
+; CHECK: mov [[PHI]], 0
+; CHECK: :
+; CHECK: movzx {{.*}}, [[PHI]]
+
+define internal i32 @testPhi2(i32 %arg) {
+entry:
+  %cmp1 = icmp sgt i32 %arg, 0
+  br i1 %cmp1, label %next, label %target
+next:
+  br label %target
+target:
+  %merge = phi i32 [ 12345, %entry ], [ 54321, %next ]
+  ret i32 %merge
+}
+; Test that compare/branch fusing and Phi lowering happens as expected.
+; CHECK-LABEL: testPhi2
+; CHECK: mov {{.*}}, 12345
+; CHECK: cmp {{.*}}, 0
+; CHECK-NEXT: jg
+; CHECK: :
+; CHECK: mov [[PHI:.*]], 54321
+; CHECK: :
+; CHECK: mov {{.*}}, [[PHI]]
+
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ