Subzero: Add a few performance measurement tools.
--timing-funcs - Produces a sorted list of total time spent translating each function.
--timing-focus=<F> - Turns on the --timing equivalent just for one function. Use '*' to do this for all functions, i.e. get complete timing breakdowns across all functions.
--verbose-focus=<F> - Temporarily turns on --verbose=all for one function.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/620373004
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 1134fdc..cf5a81b 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -25,8 +25,8 @@
Cfg::Cfg(GlobalContext *Ctx)
: Ctx(Ctx), FunctionName(""), ReturnType(IceType_void),
- IsInternalLinkage(false), HasError(false), ErrorMessage(""), Entry(NULL),
- NextInstNumber(1), Live(nullptr),
+ IsInternalLinkage(false), HasError(false), FocusedTiming(false),
+ ErrorMessage(""), Entry(NULL), NextInstNumber(1), Live(nullptr),
Target(TargetLowering::createLowering(Ctx->getTargetArch(), this)),
VMetadata(new VariablesMetadata(this)),
TargetAssembler(
@@ -69,8 +69,15 @@
void Cfg::translate() {
if (hasError())
return;
- static TimerIdT IDtranslate = GlobalContext::getTimerID("translate");
- TimerMarker T(IDtranslate, getContext());
+ VerboseMask OldVerboseMask = getContext()->getVerbose();
+ const IceString &TimingFocusOn = getContext()->getFlags().TimingFocusOn;
+ if (TimingFocusOn == "*" || TimingFocusOn == getFunctionName())
+ setFocusedTiming();
+ bool VerboseFocus =
+ (getContext()->getFlags().VerboseFocusOn == getFunctionName());
+ if (VerboseFocus)
+ getContext()->setVerbose(IceV_All);
+ TimerMarker T(TimerStack::TT_translate, this);
dump("Initial CFG");
@@ -79,6 +86,10 @@
getTarget()->translate();
dump("Final output");
+ if (getFocusedTiming())
+ getContext()->dumpTimers();
+ if (VerboseFocus)
+ getContext()->setVerbose(OldVerboseMask);
}
void Cfg::computePredecessors() {
@@ -87,9 +98,7 @@
}
void Cfg::renumberInstructions() {
- static TimerIdT IDrenumberInstructions =
- GlobalContext::getTimerID("renumberInstructions");
- TimerMarker T(IDrenumberInstructions, getContext());
+ TimerMarker T(TimerStack::TT_renumberInstructions, this);
NextInstNumber = 1;
for (CfgNode *Node : Nodes)
Node->renumberInstructions();
@@ -97,60 +106,50 @@
// placePhiLoads() must be called before placePhiStores().
void Cfg::placePhiLoads() {
- static TimerIdT IDplacePhiLoads = GlobalContext::getTimerID("placePhiLoads");
- TimerMarker T(IDplacePhiLoads, getContext());
+ TimerMarker T(TimerStack::TT_placePhiLoads, this);
for (CfgNode *Node : Nodes)
Node->placePhiLoads();
}
// placePhiStores() must be called after placePhiLoads().
void Cfg::placePhiStores() {
- static TimerIdT IDplacePhiStores =
- GlobalContext::getTimerID("placePhiStores");
- TimerMarker T(IDplacePhiStores, getContext());
+ TimerMarker T(TimerStack::TT_placePhiStores, this);
for (CfgNode *Node : Nodes)
Node->placePhiStores();
}
void Cfg::deletePhis() {
- static TimerIdT IDdeletePhis = GlobalContext::getTimerID("deletePhis");
- TimerMarker T(IDdeletePhis, getContext());
+ TimerMarker T(TimerStack::TT_deletePhis, this);
for (CfgNode *Node : Nodes)
Node->deletePhis();
}
void Cfg::doArgLowering() {
- static TimerIdT IDdoArgLowering = GlobalContext::getTimerID("doArgLowering");
- TimerMarker T(IDdoArgLowering, getContext());
+ TimerMarker T(TimerStack::TT_doArgLowering, this);
getTarget()->lowerArguments();
}
void Cfg::doAddressOpt() {
- static TimerIdT IDdoAddressOpt = GlobalContext::getTimerID("doAddressOpt");
- TimerMarker T(IDdoAddressOpt, getContext());
+ TimerMarker T(TimerStack::TT_doAddressOpt, this);
for (CfgNode *Node : Nodes)
Node->doAddressOpt();
}
void Cfg::doNopInsertion() {
- static TimerIdT IDdoNopInsertion =
- GlobalContext::getTimerID("doNopInsertion");
- TimerMarker T(IDdoNopInsertion, getContext());
+ TimerMarker T(TimerStack::TT_doNopInsertion, this);
for (CfgNode *Node : Nodes)
Node->doNopInsertion();
}
void Cfg::genCode() {
- static TimerIdT IDgenCode = GlobalContext::getTimerID("genCode");
- TimerMarker T(IDgenCode, getContext());
+ TimerMarker T(TimerStack::TT_genCode, this);
for (CfgNode *Node : Nodes)
Node->genCode();
}
// Compute the stack frame layout.
void Cfg::genFrame() {
- static TimerIdT IDgenFrame = GlobalContext::getTimerID("genFrame");
- TimerMarker T(IDgenFrame, getContext());
+ TimerMarker T(TimerStack::TT_genFrame, this);
getTarget()->addProlog(Entry);
// TODO: Consider folding epilog generation into the final
// emission/assembly pass to avoid an extra iteration over the node
@@ -165,17 +164,14 @@
// completely with a single block. It is a quick single pass and
// doesn't need to iterate until convergence.
void Cfg::livenessLightweight() {
- static TimerIdT IDlivenessLightweight =
- GlobalContext::getTimerID("livenessLightweight");
- TimerMarker T(IDlivenessLightweight, getContext());
+ TimerMarker T(TimerStack::TT_livenessLightweight, this);
getVMetadata()->init();
for (CfgNode *Node : Nodes)
Node->livenessLightweight();
}
void Cfg::liveness(LivenessMode Mode) {
- static TimerIdT IDliveness = GlobalContext::getTimerID("liveness");
- TimerMarker T(IDliveness, getContext());
+ TimerMarker T(TimerStack::TT_liveness, this);
Live.reset(new Liveness(this, Mode));
getVMetadata()->init();
Live->init();
@@ -208,8 +204,7 @@
// finer breakdown of the cost.
// Make a final pass over instructions to delete dead instructions
// and build each Variable's live range.
- static TimerIdT IDliveRange = GlobalContext::getTimerID("liveRange");
- TimerMarker T1(IDliveRange, getContext());
+ TimerMarker T1(TimerStack::TT_liveRange, this);
for (CfgNode *Node : Nodes)
Node->livenessPostprocess(Mode, getLiveness());
if (Mode == Liveness_Intervals) {
@@ -255,9 +250,7 @@
// Traverse every Variable of every Inst and verify that it
// appears within the Variable's computed live range.
bool Cfg::validateLiveness() const {
- static TimerIdT IDvalidateLiveness =
- GlobalContext::getTimerID("validateLiveness");
- TimerMarker T(IDvalidateLiveness, getContext());
+ TimerMarker T(TimerStack::TT_validateLiveness, this);
bool Valid = true;
Ostream &Str = Ctx->getStrDump();
for (CfgNode *Node : Nodes) {
@@ -300,8 +293,7 @@
}
void Cfg::doBranchOpt() {
- static TimerIdT IDdoBranchOpt = GlobalContext::getTimerID("doBranchOpt");
- TimerMarker T(IDdoBranchOpt, getContext());
+ TimerMarker T(TimerStack::TT_doBranchOpt, this);
for (auto I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
auto NextNode = I;
++NextNode;
@@ -312,8 +304,7 @@
// ======================== Dump routines ======================== //
void Cfg::emit() {
- static TimerIdT IDemit = GlobalContext::getTimerID("emit");
- TimerMarker T(IDemit, getContext());
+ TimerMarker T(TimerStack::TT_emit, this);
Ostream &Str = Ctx->getStrEmit();
if (!Ctx->testAndSetHasEmittedFirstMethod()) {
// Print a helpful command for assembling the output.
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 4766d47..e2aef46 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -96,6 +96,8 @@
return getContext()->getFlags().UseIntegratedAssembler;
}
bool hasComputedFrame() const;
+ bool getFocusedTiming() const { return FocusedTiming; }
+ void setFocusedTiming() { FocusedTiming = true; }
// Passes over the CFG.
void translate();
@@ -165,6 +167,7 @@
Type ReturnType;
bool IsInternalLinkage;
bool HasError;
+ bool FocusedTiming;
IceString ErrorMessage;
CfgNode *Entry; // entry basic block
NodeList Nodes; // linearized node list; Entry should be first
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 1a386c8..f19552f 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -495,7 +495,7 @@
Str << " // preds = ";
bool First = true;
for (CfgNode *I : InEdges) {
- if (First)
+ if (!First)
Str << ", ";
First = false;
Str << "%" << I->getName();
@@ -540,7 +540,7 @@
Str << " // succs = ";
bool First = true;
for (CfgNode *I : OutEdges) {
- if (First)
+ if (!First)
Str << ", ";
First = false;
Str << "%" << I->getName();
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index d6c232f..f1dec0c 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -25,8 +25,9 @@
: DisableInternal(false), SubzeroTimingEnabled(false),
DisableTranslation(false), DisableGlobals(false),
FunctionSections(false), UseIntegratedAssembler(false),
- UseSandboxing(false), DumpStats(false), DefaultGlobalPrefix(""),
- DefaultFunctionPrefix("") {}
+ UseSandboxing(false), DumpStats(false), TimeEachFunction(false),
+ DefaultGlobalPrefix(""), DefaultFunctionPrefix(""), TimingFocusOn(""),
+ VerboseFocusOn("") {}
bool DisableInternal;
bool SubzeroTimingEnabled;
bool DisableTranslation;
@@ -35,8 +36,11 @@
bool UseIntegratedAssembler;
bool UseSandboxing;
bool DumpStats;
+ bool TimeEachFunction;
IceString DefaultGlobalPrefix;
IceString DefaultFunctionPrefix;
+ IceString TimingFocusOn;
+ IceString VerboseFocusOn;
};
} // end of namespace Ice
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index df64cff..cdb0052 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -59,15 +59,13 @@
// Caller is expected to delete the returned Ice::Cfg object.
Ice::Cfg *convertFunction(const Function *F) {
- static Ice::TimerIdT IDllvmConvert =
- Ice::GlobalContext::getTimerID("llvmConvert");
- Ice::TimerMarker T(IDllvmConvert, Ctx);
VarMap.clear();
NodeMap.clear();
Func = new Ice::Cfg(Ctx);
Func->setFunctionName(F->getName());
Func->setReturnType(convertToIceType(F->getReturnType()));
Func->setInternal(F->hasInternalLinkage());
+ Ice::TimerMarker T(Ice::TimerStack::TT_llvmConvert, Func);
// The initial definition/use of each arg is the entry node.
for (auto ArgI = F->arg_begin(), ArgE = F->arg_end(); ArgI != ArgE;
@@ -617,8 +615,7 @@
namespace Ice {
void Converter::convertToIce() {
- static TimerIdT IDconvertToIce = GlobalContext::getTimerID("convertToIce");
- TimerMarker T(IDconvertToIce, Ctx);
+ TimerMarker T(TimerStack::TT_convertToIce, Ctx);
nameUnnamedGlobalAddresses(Mod);
if (!Ctx->getFlags().DisableGlobals)
convertGlobals(Mod);
@@ -626,13 +623,21 @@
}
void Converter::convertFunctions() {
+ TimerStackIdT StackID = GlobalContext::TSK_Funcs;
for (const Function &I : *Mod) {
if (I.empty())
continue;
+ TimerIdT TimerID = 0;
+ if (Ctx->getFlags().TimeEachFunction) {
+ TimerID = Ctx->getTimerID(StackID, I.getName());
+ Ctx->pushTimer(TimerID, StackID);
+ }
LLVM2ICEConverter FunctionConverter(Ctx, Mod->getContext());
Cfg *Fcn = FunctionConverter.convertFunction(&I);
translateFcn(Fcn);
+ if (Ctx->getFlags().TimeEachFunction)
+ Ctx->popTimer(TimerID, StackID);
}
emitConstants();
diff --git a/src/IceDefs.h b/src/IceDefs.h
index 98bd8af..29ed5d8 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -69,6 +69,7 @@
// numbers are used for representing Variable live ranges.
typedef int32_t InstNumberT;
+typedef uint32_t TimerStackIdT;
typedef uint32_t TimerIdT;
enum LivenessMode {
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 7ea7e5d..80728b0 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -119,7 +119,11 @@
: StrDump(OsDump), StrEmit(OsEmit), VMask(Mask),
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), HasEmittedFirstMethod(false),
- RNG(""), Timers(new TimerStack("main")) {}
+ RNG("") {
+ // Pre-register built-in stack names.
+ newTimerStackID("Total across all functions");
+ newTimerStackID("Per-function summary");
+}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with
// S<num>_ where <num> is the next base-36 value. If a type name
@@ -381,13 +385,27 @@
llvm_unreachable("Unknown type");
}
-TimerIdT GlobalContext::getTimerID(const IceString &Name) {
- return TimerStack::getTimerID(Name);
+TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
+ const IceString &Name) {
+ assert(StackID < Timers.size());
+ return Timers[StackID].getTimerID(Name);
}
-void GlobalContext::pushTimer(TimerIdT ID) { Timers->push(ID); }
+TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
+ TimerStackIdT NewID = Timers.size();
+ Timers.push_back(TimerStack(Name));
+ return NewID;
+}
-void GlobalContext::popTimer(TimerIdT ID) { Timers->pop(ID); }
+void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
+ assert(StackID < Timers.size());
+ Timers[StackID].push(ID);
+}
+
+void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
+ assert(StackID < Timers.size());
+ Timers[StackID].pop(ID);
+}
void GlobalContext::dumpStats(const IceString &Name, bool Final) {
if (Flags.DumpStats) {
@@ -400,6 +418,16 @@
}
}
-void GlobalContext::dumpTimers() { Timers->dump(getStrDump()); }
+void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
+ assert(Timers.size() > StackID);
+ Timers[StackID].dump(getStrDump(), DumpCumulative);
+}
+
+TimerMarker::TimerMarker(TimerIdT ID, const Cfg *Func)
+ : ID(ID), Ctx(Func->getContext()),
+ Active(Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled) {
+ if (Active)
+ Ctx->pushTimer(ID);
+}
} // end of namespace Ice
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index 0ffff1c..9968abb 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -23,6 +23,7 @@
#include "IceDefs.h"
#include "IceIntrinsics.h"
#include "IceRNG.h"
+#include "IceTimerTree.h"
#include "IceTypes.h"
namespace Ice {
@@ -71,6 +72,7 @@
// Returns true if any of the specified options in the verbose mask
// are set. If the argument is omitted, it checks if any verbose
// options at all are set.
+ VerboseMask getVerbose() const { return VMask; }
bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
void setVerbose(VerboseMask Mask) { VMask = Mask; }
void addVerbose(VerboseMask Mask) { VMask |= Mask; }
@@ -151,10 +153,19 @@
StatsCumulative.updateFills();
}
- static TimerIdT getTimerID(const IceString &Name);
- void pushTimer(TimerIdT ID);
- void popTimer(TimerIdT ID);
- void dumpTimers();
+ // These are predefined TimerStackIdT values.
+ enum TimerStackKind {
+ TSK_Default = 0,
+ TSK_Funcs,
+ TSK_Num
+ };
+
+ TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
+ TimerStackIdT newTimerStackID(const IceString &Name);
+ void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
+ void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
+ void dumpTimers(TimerStackIdT StackID = TSK_Default,
+ bool DumpCumulative = true);
private:
Ostream *StrDump; // Stream for dumping / diagnostics
@@ -172,7 +183,7 @@
RandomNumberGenerator RNG;
CodeStats StatsFunction;
CodeStats StatsCumulative;
- std::unique_ptr<class TimerStack> Timers;
+ std::vector<TimerStack> Timers;
GlobalContext(const GlobalContext &) = delete;
GlobalContext &operator=(const GlobalContext &) = delete;
@@ -194,6 +205,8 @@
if (Active)
Ctx->pushTimer(ID);
}
+ TimerMarker(TimerIdT ID, const Cfg *Func);
+
~TimerMarker() {
if (Active)
Ctx->popTimer(ID);
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 18b4b44..7150fa9 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -782,7 +782,8 @@
// Unary XMM ops
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Sqrtss::Emitter = {
- &x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss};
+ &x86::AssemblerX86::sqrtss, &x86::AssemblerX86::sqrtss
+};
// Binary GPR ops
template <>
@@ -824,58 +825,76 @@
// Binary XMM ops
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addss::Emitter = {
- &x86::AssemblerX86::addss, &x86::AssemblerX86::addss};
+ &x86::AssemblerX86::addss, &x86::AssemblerX86::addss
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Addps::Emitter = {
- &x86::AssemblerX86::addps, &x86::AssemblerX86::addps};
+ &x86::AssemblerX86::addps, &x86::AssemblerX86::addps
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divss::Emitter = {
- &x86::AssemblerX86::divss, &x86::AssemblerX86::divss};
+ &x86::AssemblerX86::divss, &x86::AssemblerX86::divss
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Divps::Emitter = {
- &x86::AssemblerX86::divps, &x86::AssemblerX86::divps};
+ &x86::AssemblerX86::divps, &x86::AssemblerX86::divps
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulss::Emitter = {
- &x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss};
+ &x86::AssemblerX86::mulss, &x86::AssemblerX86::mulss
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Mulps::Emitter = {
- &x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps};
+ &x86::AssemblerX86::mulps, &x86::AssemblerX86::mulps
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Padd::Emitter = {
- &x86::AssemblerX86::padd, &x86::AssemblerX86::padd};
+ &x86::AssemblerX86::padd, &x86::AssemblerX86::padd
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pand::Emitter = {
- &x86::AssemblerX86::pand, &x86::AssemblerX86::pand};
+ &x86::AssemblerX86::pand, &x86::AssemblerX86::pand
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pandn::Emitter = {
- &x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn};
+ &x86::AssemblerX86::pandn, &x86::AssemblerX86::pandn
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpeq::Emitter = {
- &x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq};
+ &x86::AssemblerX86::pcmpeq, &x86::AssemblerX86::pcmpeq
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pcmpgt::Emitter = {
- &x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt};
+ &x86::AssemblerX86::pcmpgt, &x86::AssemblerX86::pcmpgt
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmull::Emitter = {
- &x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull};
+ &x86::AssemblerX86::pmull, &x86::AssemblerX86::pmull
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pmuludq::Emitter = {
- &x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq};
+ &x86::AssemblerX86::pmuludq, &x86::AssemblerX86::pmuludq
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Por::Emitter = {
- &x86::AssemblerX86::por, &x86::AssemblerX86::por};
+ &x86::AssemblerX86::por, &x86::AssemblerX86::por
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Psub::Emitter = {
- &x86::AssemblerX86::psub, &x86::AssemblerX86::psub};
+ &x86::AssemblerX86::psub, &x86::AssemblerX86::psub
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Pxor::Emitter = {
- &x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor};
+ &x86::AssemblerX86::pxor, &x86::AssemblerX86::pxor
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subss::Emitter = {
- &x86::AssemblerX86::subss, &x86::AssemblerX86::subss};
+ &x86::AssemblerX86::subss, &x86::AssemblerX86::subss
+};
template <>
const x86::AssemblerX86::XmmEmitterRegOp InstX8632Subps::Emitter = {
- &x86::AssemblerX86::subps, &x86::AssemblerX86::subps};
+ &x86::AssemblerX86::subps, &x86::AssemblerX86::subps
+};
// Binary XMM Shift ops
template <>
@@ -1427,10 +1446,11 @@
const Operand *Src1 = getSrc(1);
Type Ty = Src0->getType();
static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
- &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp,
- &x86::AssemblerX86::cmp};
+ &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
+ };
static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
- &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp};
+ &x86::AssemblerX86::cmp, &x86::AssemblerX86::cmp
+ };
if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar0->hasReg()) {
emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
@@ -1471,7 +1491,8 @@
const Variable *Src0 = llvm::cast<Variable>(getSrc(0));
Type Ty = Src0->getType();
const static x86::AssemblerX86::XmmEmitterRegOp Emitter = {
- &x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss};
+ &x86::AssemblerX86::ucomiss, &x86::AssemblerX86::ucomiss
+ };
emitIASRegOpTyXMM(Func, Ty, Src0, getSrc(1), Emitter);
}
@@ -1517,9 +1538,11 @@
Type Ty = Src0->getType();
// The Reg/Addr form of test is not encodeable.
static const x86::AssemblerX86::GPREmitterRegOp RegEmitter = {
- &x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test};
+ &x86::AssemblerX86::test, NULL, &x86::AssemblerX86::test
+ };
static const x86::AssemblerX86::GPREmitterAddrOp AddrEmitter = {
- &x86::AssemblerX86::test, &x86::AssemblerX86::test};
+ &x86::AssemblerX86::test, &x86::AssemblerX86::test
+ };
if (const Variable *SrcVar0 = llvm::dyn_cast<Variable>(Src0)) {
if (SrcVar0->hasReg()) {
emitIASRegOpTyGPR(Func, Ty, SrcVar0, Src1, RegEmitter);
@@ -1770,8 +1793,9 @@
const Variable *Dest = getDest();
const Operand *Src = getSrc(0);
const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
- &x86::AssemblerX86::movups, &x86::AssemblerX86::movups,
- &x86::AssemblerX86::movups};
+ &x86::AssemblerX86::movups, &x86::AssemblerX86::movups,
+ &x86::AssemblerX86::movups
+ };
emitIASMovlikeXMM(Func, Dest, Src, Emitter);
}
@@ -1794,8 +1818,8 @@
const Variable *Dest = getDest();
const Operand *Src = getSrc(0);
const static x86::AssemblerX86::XmmEmitterMovOps Emitter = {
- &x86::AssemblerX86::movq, &x86::AssemblerX86::movq,
- &x86::AssemblerX86::movq};
+ &x86::AssemblerX86::movq, &x86::AssemblerX86::movq, &x86::AssemblerX86::movq
+ };
emitIASMovlikeXMM(Func, Dest, Src, Emitter);
}
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index c366dd9..c631e80 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -278,8 +278,7 @@
}
void VariablesMetadata::init() {
- static TimerIdT IDvmetadata = GlobalContext::getTimerID("vmetadata");
- TimerMarker T(IDvmetadata, Func->getContext());
+ TimerMarker T(TimerStack::TT_vmetadata, Func);
Metadata.clear();
Metadata.resize(Func->getNumVariables());
@@ -438,7 +437,7 @@
Str << "(weight=" << Weight << ") ";
bool First = true;
for (const RangeElementType &I : Range) {
- if (First)
+ if (!First)
Str << ", ";
First = false;
Str << "[" << I.first << ":" << I.second << ")";
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 69353a1..2d00db0 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -64,8 +64,7 @@
// preparation. Results are assigned to Variable::RegNum for each
// Variable.
void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull) {
- static TimerIdT IDscan = GlobalContext::getTimerID("linearScan");
- TimerMarker T(IDscan, Func->getContext());
+ TimerMarker T(TimerStack::TT_linearScan, Func);
assert(RegMaskFull.any()); // Sanity check
Unhandled.clear();
UnhandledPrecolored.clear();
@@ -86,9 +85,7 @@
// storing Func->getVariables().
const VarList &Vars = Func->getVariables();
{
- static TimerIdT IDinitUnhandled =
- GlobalContext::getTimerID("initUnhandled");
- TimerMarker T(IDinitUnhandled, Func->getContext());
+ TimerMarker T(TimerStack::TT_initUnhandled, Func);
for (Variable *Var : Vars) {
// Explicitly don't consider zero-weight variables, which are
// meant to be spill slots.
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 6dc46b1..bcc6290 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -229,8 +229,7 @@
// registers could potentially be parameterized if we want to restrict
// registers e.g. for performance testing.
void TargetLowering::regAlloc() {
- static TimerIdT IDregAlloc = GlobalContext::getTimerID("regAlloc");
- TimerMarker T(IDregAlloc, Ctx);
+ TimerMarker T(TimerStack::TT_regAlloc, Func);
LinearScan LinearScan(Func);
RegSetMask RegInclude = RegSet_None;
RegSetMask RegExclude = RegSet_None;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 3217141..9cac11d 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -313,9 +313,7 @@
}
void TargetX8632::translateO2() {
- GlobalContext *Context = Func->getContext();
- static TimerIdT IDO2 = GlobalContext::getTimerID("O2");
- TimerMarker T(IDO2, Context);
+ TimerMarker T(TimerStack::TT_O2, Func);
// Lower Phi instructions.
Func->placePhiLoads();
@@ -400,9 +398,7 @@
}
void TargetX8632::translateOm1() {
- GlobalContext *Context = Func->getContext();
- static TimerIdT IDOm1 = GlobalContext::getTimerID("Om1");
- TimerMarker T(IDOm1, Context);
+ TimerMarker T(TimerStack::TT_Om1, Func);
Func->placePhiLoads();
if (Func->hasError())
return;
@@ -4305,8 +4301,7 @@
void TargetX8632::postLower() {
if (Ctx->getOptLevel() != Opt_m1)
return;
- static TimerIdT IDpostLower = GlobalContext::getTimerID("postLower");
- TimerMarker T(IDpostLower, Ctx);
+ TimerMarker T(TimerStack::TT_postLower, Func);
// TODO: Avoid recomputing WhiteList every instruction.
RegSetMask RegInclude = RegSet_All;
RegSetMask RegExclude = RegSet_StackPointer;
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp
index 847941f..0cd73dc 100644
--- a/src/IceTimerTree.cpp
+++ b/src/IceTimerTree.cpp
@@ -19,26 +19,28 @@
namespace Ice {
-std::vector<IceString> TimerStack::IDs;
-
-TimerStack::TimerStack(const IceString &TopLevelName)
- : FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
+TimerStack::TimerStack(const IceString &Name)
+ : Name(Name), FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp),
StateChangeCount(0), StackTop(0) {
Nodes.resize(1); // Reserve Nodes[0] for the root node.
- push(getTimerID(TopLevelName));
+ IDs.resize(TT__num);
+#define STR(s) #s
+#define X(tag) \
+ IDs[TT_##tag] = STR(tag); \
+ IDsIndex[STR(tag)] = TT_##tag;
+ TIMERTREE_TABLE;
+#undef X
+#undef STR
}
// Returns the unique timer ID for the given Name, creating a new ID
-// if needed. For performance reasons, it's best to make only one
-// call per Name and cache the result, e.g. via a static initializer.
+// if needed.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
- TimerIdT Size = IDs.size();
- for (TimerIdT i = 0; i < Size; ++i) {
- if (IDs[i] == Name)
- return i;
+ if (IDsIndex.find(Name) == IDsIndex.end()) {
+ IDsIndex[Name] = IDs.size();
+ IDs.push_back(Name);
}
- IDs.push_back(Name);
- return Size;
+ return IDsIndex[Name];
}
// Pushes a new marker onto the timer stack.
@@ -112,27 +114,29 @@
} // end of anonymous namespace
-void TimerStack::dump(Ostream &Str) {
+void TimerStack::dump(Ostream &Str, bool DumpCumulative) {
update();
double TotalTime = LastTimestamp - FirstTimestamp;
assert(TotalTime);
- Str << "Cumulative function times:\n";
- DumpMapType CumulativeMap;
- for (TTindex i = 1; i < Nodes.size(); ++i) {
- TTindex Prefix = i;
- IceString Suffix = "";
- while (Prefix) {
- if (Suffix.empty())
- Suffix = IDs[Nodes[Prefix].Interior];
- else
- Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
- assert(Nodes[Prefix].Parent < Prefix);
- Prefix = Nodes[Prefix].Parent;
+ if (DumpCumulative) {
+ Str << Name << " - Cumulative times:\n";
+ DumpMapType CumulativeMap;
+ for (TTindex i = 1; i < Nodes.size(); ++i) {
+ TTindex Prefix = i;
+ IceString Suffix = "";
+ while (Prefix) {
+ if (Suffix.empty())
+ Suffix = IDs[Nodes[Prefix].Interior];
+ else
+ Suffix = IDs[Nodes[Prefix].Interior] + "." + Suffix;
+ assert(Nodes[Prefix].Parent < Prefix);
+ Prefix = Nodes[Prefix].Parent;
+ }
+ CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
}
- CumulativeMap.insert(std::make_pair(Nodes[i].Time, Suffix));
+ dumpHelper(Str, CumulativeMap, TotalTime);
}
- dumpHelper(Str, CumulativeMap, TotalTime);
- Str << "Flat function times:\n";
+ Str << Name << " - Flat times:\n";
DumpMapType FlatMap;
for (TimerIdT i = 0; i < LeafTimes.size(); ++i) {
FlatMap.insert(std::make_pair(LeafTimes[i], IDs[i]));
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
new file mode 100644
index 0000000..5319b1d
--- /dev/null
+++ b/src/IceTimerTree.def
@@ -0,0 +1,49 @@
+//===- subzero/src/IceTimerTree.def - X-macros for timing -------*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file lists predefined timing tags. New tags can be added to
+// avoid a runtime string lookup.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICETIMERTREE_DEF
+
+#define TIMERTREE_TABLE \
+ /* enum value */ \
+ X(O2) \
+ X(Om1) \
+ X(convertToIce) \
+ X(deletePhis) \
+ X(doAddressOpt) \
+ X(doArgLowering) \
+ X(doBranchOpt) \
+ X(doNopInsertion) \
+ X(emit) \
+ X(genCode) \
+ X(genFrame) \
+ X(initUnhandled) \
+ X(linearScan) \
+ X(liveRange) \
+ X(liveness) \
+ X(livenessLightweight) \
+ X(llvmConvert) \
+ X(parse) \
+ X(placePhiLoads) \
+ X(placePhiStores) \
+ X(postLower) \
+ X(regAlloc) \
+ X(renumberInstructions) \
+ X(szmain) \
+ X(translate) \
+ X(validateLiveness) \
+ X(vmetadata)
+//#define X(tag)
+
+#define SUBZERO_SRC_ICETIMERTREE_DEF
+#endif // SUBZERO_SRC_ICETIMERTREE_DEF
diff --git a/src/IceTimerTree.h b/src/IceTimerTree.h
index 029b7f4..289ea2d 100644
--- a/src/IceTimerTree.h
+++ b/src/IceTimerTree.h
@@ -15,6 +15,8 @@
#ifndef SUBZERO_SRC_ICETIMERTREE_H
#define SUBZERO_SRC_ICETIMERTREE_H
+#include "IceTimerTree.def"
+
namespace Ice {
class TimerTreeNode;
@@ -38,23 +40,32 @@
};
class TimerStack {
- TimerStack(const TimerStack &) = delete;
+ // TimerStack(const TimerStack &) = delete;
TimerStack &operator=(const TimerStack &) = delete;
public:
- TimerStack(const IceString &TopLevelName);
- static TimerIdT getTimerID(const IceString &Name);
+ enum TimerTag {
+#define X(tag) TT_##tag,
+ TIMERTREE_TABLE
+#undef X
+ TT__num
+ };
+ TimerStack(const IceString &Name);
+ TimerIdT getTimerID(const IceString &Name);
void push(TimerIdT ID);
void pop(TimerIdT ID);
- void dump(Ostream &Str);
+ void dump(Ostream &Str, bool DumpCumulative);
private:
void update();
static double timestamp();
+ const IceString Name;
const double FirstTimestamp;
double LastTimestamp;
uint64_t StateChangeCount;
- static std::vector<IceString> IDs; // indexed by TimerIdT
+ // IDsIndex maps a symbolic timer name to its integer ID.
+ std::map<IceString, TimerIdT> IDsIndex;
+ std::vector<IceString> IDs; // indexed by TimerIdT
std::vector<TimerTreeNode> Nodes; // indexed by TTindex
std::vector<double> LeafTimes; // indexed by TimerIdT
TTindex StackTop;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index af78e2f..f05835a 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -843,6 +843,11 @@
NextLocalInstIndex(Context->getNumGlobalValueIDs()),
InstIsTerminating(false) {
Func->setFunctionName(LLVMFunc->getName());
+ if (getFlags().TimeEachFunction)
+ getTranslator().getContext()->pushTimer(
+ getTranslator().getContext()->getTimerID(
+ Ice::GlobalContext::TSK_Funcs, Func->getFunctionName()),
+ Ice::GlobalContext::TSK_Funcs);
Func->setReturnType(Context->convertToIceType(LLVMFunc->getReturnType()));
Func->setInternal(LLVMFunc->hasInternalLinkage());
CurrentNode = InstallNextBasicBlock();
@@ -1404,6 +1409,11 @@
// for such parsing errors.
if (Context->getNumErrors() == 0)
getTranslator().translateFcn(Func);
+ if (getFlags().TimeEachFunction)
+ getTranslator().getContext()->popTimer(
+ getTranslator().getContext()->getTimerID(Ice::GlobalContext::TSK_Funcs,
+ Func->getFunctionName()),
+ Ice::GlobalContext::TSK_Funcs);
}
void FunctionParser::ReportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op,
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index f95c02e..f392b7e 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -95,8 +95,22 @@
"timing", cl::desc("Enable breakdown timing of Subzero translation"));
static cl::opt<bool>
- DisableGlobals("disable-globals",
- cl::desc("Disable global initializer translation"));
+TimeEachFunction("timing-funcs",
+ cl::desc("Print total translation time for each function"));
+
+static cl::opt<std::string> TimingFocusOn(
+ "timing-focus",
+ cl::desc("Break down timing for a specific function (use '*' for all)"),
+ cl::init(""));
+
+static cl::opt<std::string> VerboseFocusOn(
+ "verbose-focus",
+ cl::desc("Temporarily enable full verbosity for a specific function"),
+ cl::init(""));
+
+static cl::opt<bool>
+DisableGlobals("disable-globals",
+ cl::desc("Disable global initializer translation"));
// This is currently unused, and is a placeholder for lit tests.
static cl::opt<bool>
@@ -169,13 +183,15 @@
Flags.UseIntegratedAssembler = UseIntegratedAssembler;
Flags.UseSandboxing = UseSandboxing;
Flags.DumpStats = DumpStats;
+ Flags.TimeEachFunction = TimeEachFunction;
Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;
+ Flags.TimingFocusOn = TimingFocusOn;
+ Flags.VerboseFocusOn = VerboseFocusOn;
Ice::GlobalContext Ctx(Ls, Os, VMask, TargetArch, OptLevel, TestPrefix,
Flags);
- static Ice::TimerIdT IDszmain = Ice::GlobalContext::getTimerID("szmain");
- Ice::TimerMarker T(IDszmain, &Ctx);
+ Ice::TimerMarker T(Ice::TimerStack::TT_szmain, &Ctx);
int ErrorStatus = 0;
if (BuildOnRead) {
@@ -185,8 +201,7 @@
} else {
// Parse the input LLVM IR file into a module.
SMDiagnostic Err;
- static Ice::TimerIdT IDparse = Ice::GlobalContext::getTimerID("parse");
- Ice::TimerMarker T1(IDparse, &Ctx);
+ Ice::TimerMarker T1(Ice::TimerStack::TT_parse, &Ctx);
Module *Mod =
NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext());
@@ -199,6 +214,10 @@
Converter.convertToIce();
ErrorStatus = Converter.getErrorStatus();
}
+ if (TimeEachFunction) {
+ const bool DumpCumulative = false;
+ Ctx.dumpTimers(Ice::GlobalContext::TSK_Funcs, DumpCumulative);
+ }
if (SubzeroTimingEnabled)
Ctx.dumpTimers();
const bool FinalStats = true;