Subzero: Rewrite the pass timing infrastructure.
This makes it much more useful for individual analysis and long-term translation performance tracking.
1. Collect and report aggregated across the entire translation, instead of function-by-function. If you really care about a single function, just extract it and translate it separately for analysis.
2. Remove "-verbose time" and just use -timing.
3. Collects two kinds of timings: cumulative and flat. Cumulative measures the total time, even if a callee also times itself. Flat only measures the currently active timer at the top of the stack. The flat times should add up to 100%, but cumulative will usually add up to much more than 100%.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/610813002
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 8f51f43..d2c83ab 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -69,14 +69,14 @@
void Cfg::translate() {
if (hasError())
return;
+ static TimerIdT IDtranslate = GlobalContext::getTimerID("translate");
+ TimerMarker T(IDtranslate, getContext());
dump("Initial CFG");
- Timer T_translate;
// The set of translation passes and their order are determined by
// the target.
getTarget()->translate();
- T_translate.printElapsedUs(getContext(), "translate()");
dump("Final output");
}
@@ -88,6 +88,9 @@
}
void Cfg::renumberInstructions() {
+ static TimerIdT IDrenumberInstructions =
+ GlobalContext::getTimerID("renumberInstructions");
+ TimerMarker T(IDrenumberInstructions, getContext());
NextInstNumber = 1;
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->renumberInstructions();
@@ -96,6 +99,8 @@
// placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() {
+ static TimerIdT IDplacePhiLoads = GlobalContext::getTimerID("placePhiLoads");
+ TimerMarker T(IDplacePhiLoads, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->placePhiLoads();
}
@@ -103,34 +108,48 @@
// placePhiStores() must be called after placePhiLoads().
void Cfg::placePhiStores() {
+ static TimerIdT IDplacePhiStores =
+ GlobalContext::getTimerID("placePhiStores");
+ TimerMarker T(IDplacePhiStores, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->placePhiStores();
}
}
void Cfg::deletePhis() {
+ static TimerIdT IDdeletePhis = GlobalContext::getTimerID("deletePhis");
+ TimerMarker T(IDdeletePhis, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->deletePhis();
}
}
void Cfg::doArgLowering() {
+ static TimerIdT IDdoArgLowering = GlobalContext::getTimerID("doArgLowering");
+ TimerMarker T(IDdoArgLowering, getContext());
getTarget()->lowerArguments();
}
void Cfg::doAddressOpt() {
+ static TimerIdT IDdoAddressOpt = GlobalContext::getTimerID("doAddressOpt");
+ TimerMarker T(IDdoAddressOpt, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->doAddressOpt();
}
}
void Cfg::doNopInsertion() {
+ static TimerIdT IDdoNopInsertion =
+ GlobalContext::getTimerID("doNopInsertion");
+ TimerMarker T(IDdoNopInsertion, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->doNopInsertion();
}
}
void Cfg::genCode() {
+ static TimerIdT IDgenCode = GlobalContext::getTimerID("genCode");
+ TimerMarker T(IDgenCode, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->genCode();
}
@@ -138,6 +157,8 @@
// Compute the stack frame layout.
void Cfg::genFrame() {
+ static TimerIdT IDgenFrame = GlobalContext::getTimerID("genFrame");
+ TimerMarker T(IDgenFrame, getContext());
getTarget()->addProlog(Entry);
// TODO: Consider folding epilog generation into the final
// emission/assembly pass to avoid an extra iteration over the node
@@ -154,6 +175,9 @@
// completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence.
void Cfg::livenessLightweight() {
+ static TimerIdT IDlivenessLightweight =
+ GlobalContext::getTimerID("livenessLightweight");
+ TimerMarker T(IDlivenessLightweight, getContext());
getVMetadata()->init();
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->livenessLightweight();
@@ -161,6 +185,8 @@
}
void Cfg::liveness(LivenessMode Mode) {
+ static TimerIdT IDliveness = GlobalContext::getTimerID("liveness");
+ TimerMarker T(IDliveness, getContext());
Live.reset(new Liveness(this, Mode));
getVMetadata()->init();
Live->init();
@@ -199,9 +225,10 @@
// Collect timing for just the portion that constructs the live
// range intervals based on the end-of-live-range computation, for a
// finer breakdown of the cost.
- Timer T_liveRange;
// Make a final pass over instructions to delete dead instructions
// and build each Variable's live range.
+ static TimerIdT IDliveRange = GlobalContext::getTimerID("liveRange");
+ TimerMarker T1(IDliveRange, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
(*I)->livenessPostprocess(Mode, getLiveness());
}
@@ -241,7 +268,6 @@
if (Var->getWeight().isInf())
Var->setLiveRangeInfiniteWeight();
}
- T_liveRange.printElapsedUs(getContext(), "live range construction");
dump();
}
}
@@ -249,6 +275,9 @@
// Traverse every Variable of every Inst and verify that it
// appears within the Variable's computed live range.
bool Cfg::validateLiveness() const {
+ static TimerIdT IDvalidateLiveness =
+ GlobalContext::getTimerID("validateLiveness");
+ TimerMarker T(IDvalidateLiveness, getContext());
bool Valid = true;
Ostream &Str = Ctx->getStrDump();
for (NodeList::const_iterator I1 = Nodes.begin(), E1 = Nodes.end(); I1 != E1;
@@ -296,18 +325,21 @@
}
void Cfg::doBranchOpt() {
+ static TimerIdT IDdoBranchOpt = GlobalContext::getTimerID("doBranchOpt");
+ TimerMarker T(IDdoBranchOpt, getContext());
for (NodeList::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
NodeList::iterator NextNode = I;
++NextNode;
- (*I)->doBranchOpt(*NextNode);
+ (*I)->doBranchOpt(NextNode == E ? NULL : *NextNode);
}
}
// ======================== Dump routines ======================== //
void Cfg::emit() {
+ static TimerIdT IDemit = GlobalContext::getTimerID("emit");
+ TimerMarker T(IDemit, getContext());
Ostream &Str = Ctx->getStrEmit();
- Timer T_emit;
if (!Ctx->testAndSetHasEmittedFirstMethod()) {
// Print a helpful command for assembling the output.
// TODO: have the Target emit the header
@@ -339,7 +371,6 @@
(*I)->emit(this);
}
Str << "\n";
- T_emit.printElapsedUs(Ctx, "emit()");
}
// Dumps the IR with an optional introductory message.