Subzero: Fix Phi lowering.
This addresses a TODO and fixes a code generation bug that was causing sprintf and _vfprintf_r to execute incorrectly.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/560773002
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 696a2f0..f6b4a98 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -115,20 +115,62 @@
// critical edges, add the assignments, and lower them. This should
// reduce the amount of shuffling at the end of each block.
void CfgNode::placePhiStores() {
- // Find the insertion point. TODO: After branch/compare fusing is
- // implemented, try not to insert Phi stores between the compare and
- // conditional branch instructions, otherwise the branch/compare
- // pattern matching may fail. However, the branch/compare sequence
- // will have to be broken if the compare result is read (by the
- // assignment) before it is written (by the compare).
+ // Find the insertion point.
InstList::iterator InsertionPoint = Insts.end();
- // Every block must end in a terminator instruction.
+ // Every block must end in a terminator instruction, and therefore
+ // must have at least one instruction, so it's valid to decrement
+ // InsertionPoint (but assert just in case).
assert(InsertionPoint != Insts.begin());
--InsertionPoint;
// Confirm that InsertionPoint is a terminator instruction. Calling
// getTerminatorEdges() on a non-terminator instruction will cause
// an llvm_unreachable().
(void)(*InsertionPoint)->getTerminatorEdges();
+ // SafeInsertionPoint is always immediately before the terminator
+ // instruction. If the block ends in a compare and conditional
+ // branch, it's better to place the Phi store before the compare so
+ // as not to interfere with compare/branch fusing. However, if the
+ // compare instruction's dest operand is the same as the new
+ // assignment statement's source operand, this can't be done due to
+ // data dependences, so we need to fall back to the
+ // SafeInsertionPoint. To illustrate:
+ // ; <label>:95
+ // %97 = load i8* %96, align 1
+ // %98 = icmp ne i8 %97, 0
+ // br i1 %98, label %99, label %2132
+ // ; <label>:99
+ // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
+ // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
+ // would be Phi-lowered as:
+ // ; <label>:95
+ // %97 = load i8* %96, align 1
+ // %100_phi = %97 ; can be at InsertionPoint
+ // %98 = icmp ne i8 %97, 0
+ // %101_phi = %98 ; must be at SafeInsertionPoint
+ // br i1 %98, label %99, label %2132
+ // ; <label>:99
+ // %100 = %100_phi
+ // %101 = %101_phi
+ //
+ // TODO(stichnot): It may be possible to bypass this whole
+ // SafeInsertionPoint mechanism. If a source basic block ends in a
+ // conditional branch:
+ // labelSource:
+ // ...
+ // br i1 %foo, label %labelTrue, label %labelFalse
+ // and a branch target has a Phi involving the branch operand:
+ // labelTrue:
+ // %bar = phi i1 [ %foo, %labelSource ], ...
+ // then we actually know the constant i1 value of the Phi operand:
+ // labelTrue:
+ // %bar = phi i1 [ true, %labelSource ], ...
+ // It seems that this optimization should be done by clang or opt,
+ // but we could also do it here.
+ InstList::iterator SafeInsertionPoint = InsertionPoint;
+ // Keep track of the dest variable of a compare instruction, so that
+ // we insert the new instruction at the SafeInsertionPoint if the
+ // compare's dest matches the Phi-lowered assignment's source.
+ Variable *CmpInstDest = NULL;
// If the current insertion point is at a conditional branch
// instruction, and the previous instruction is a compare
// instruction, then we move the insertion point before the compare
@@ -137,8 +179,10 @@
if (!Branch->isUnconditional()) {
if (InsertionPoint != Insts.begin()) {
--InsertionPoint;
- if (!llvm::isa<InstIcmp>(*InsertionPoint) &&
- !llvm::isa<InstFcmp>(*InsertionPoint)) {
+ if (llvm::isa<InstIcmp>(*InsertionPoint) ||
+ llvm::isa<InstFcmp>(*InsertionPoint)) {
+ CmpInstDest = (*InsertionPoint)->getDest();
+ } else {
++InsertionPoint;
}
}
@@ -165,7 +209,10 @@
Dest->setPreferredRegister(Src, AllowOverlap);
Src->setPreferredRegister(Dest, AllowOverlap);
}
- Insts.insert(InsertionPoint, NewInst);
+ if (CmpInstDest == Operand)
+ Insts.insert(SafeInsertionPoint, NewInst);
+ else
+ Insts.insert(InsertionPoint, NewInst);
NewInst->updateVars(this);
}
}
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index e704ed3..986ff76 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -97,6 +97,11 @@
DisableGlobals("disable-globals",
cl::desc("Disable global initializer translation"));
+// This is currently unused, and is a placeholder for lit tests.
+static cl::opt<bool>
+ DisablePhiEdgeSplit("no-phi-edge-split",
+ cl::desc("Disable edge splitting for Phi lowering"));
+
static cl::opt<NaClFileFormat> InputFileFormat(
"bitcode-format", cl::desc("Define format of input file:"),
cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
diff --git a/tests_lit/llvm2ice_tests/phi.ll b/tests_lit/llvm2ice_tests/phi.ll
new file mode 100644
index 0000000..ae96063
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/phi.ll
@@ -0,0 +1,60 @@
+; This tests some of the subtleties of Phi lowering. In particular,
+; it tests that it does the right thing when it tries to enable
+; compare/branch fusing.
+
+; RUN: %llvm2ice -O2 --verbose none --no-phi-edge-split %s \
+; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
+; RUN: | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN: | FileCheck --check-prefix=DUMP %s
+
+define internal i32 @testPhi1(i32 %arg) {
+entry:
+ %cmp1 = icmp sgt i32 %arg, 0
+ br i1 %cmp1, label %next, label %target
+next:
+ br label %target
+target:
+ %merge = phi i1 [ %cmp1, %entry ], [ false, %next ]
+ %result = zext i1 %merge to i32
+ ret i32 %result
+}
+; Test that compare/branch fusing does not happen, and Phi lowering is
+; put in the right place.
+; CHECK-LABEL: testPhi1
+; CHECK: cmp {{.*}}, 0
+; CHECK: mov {{.*}}, 1
+; CHECK: jg
+; CHECK: mov {{.*}}, 0
+; CHECK: mov [[PHI:.*]],
+; CHECK: cmp {{.*}}, 0
+; CHECK: jne
+; CHECK: :
+; CHECK: mov [[PHI]], 0
+; CHECK: :
+; CHECK: movzx {{.*}}, [[PHI]]
+
+define internal i32 @testPhi2(i32 %arg) {
+entry:
+ %cmp1 = icmp sgt i32 %arg, 0
+ br i1 %cmp1, label %next, label %target
+next:
+ br label %target
+target:
+ %merge = phi i32 [ 12345, %entry ], [ 54321, %next ]
+ ret i32 %merge
+}
+; Test that compare/branch fusing and Phi lowering happens as expected.
+; CHECK-LABEL: testPhi2
+; CHECK: mov {{.*}}, 12345
+; CHECK: cmp {{.*}}, 0
+; CHECK-NEXT: jg
+; CHECK: :
+; CHECK: mov [[PHI:.*]], 54321
+; CHECK: :
+; CHECK: mov {{.*}}, [[PHI]]
+
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ