Subzero: Improve the use of timers.
Several things are done here:
1. Move timer support to be guarded by the ALLOW_TIMERS define, or the BuildDefs::timers() constexpr method.
2. Add a NODUMP build configuration to control whether dump support is built in. So "make -f Makefile.standalone NODUMP=1 NOASSERT=1" is pretty close to a MINIMAL build with timer support.
3. Add some missing timers: alloca analysis, RMW analysis, helper call pre-lowering, load optimization analysis. These omitted pass timings were being rolled up into the "O2" bucket.
4. Add timers around push and pop operations on the translate queue and the emit queue.
5. Refactor the clumsy code to push/pop function timers (as opposed to pass timers), so that it fits into the nice RAII TimerMarker class like the pass timers.
6. It turns out that even with MINIMAL or NODUMP builds, we still construct a longish std::string every time Cfg::dump() is called, even though the string isn't used in MINIMAL/NODUMP mode. The dump() arg might as well be a const char * arg instead.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4360
R=kschimpf@google.com
Review URL: https://codereview.chromium.org/1784243006 .
diff --git a/Makefile.standalone b/Makefile.standalone
index 3c2350f..0949b66 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -84,12 +84,20 @@
ifdef MINIMAL
NOASSERT = 1
+ NODUMP = 1
OBJDIR := $(OBJDIR)+Min
- BASE_CXX_DEFINES += -DALLOW_DUMP=0 -DALLOW_LLVM_CL=0 -DALLOW_LLVM_IR=0 \
- -DALLOW_LLVM_IR_AS_INPUT=0 -DALLOW_MINIMAL_BUILD=1
+ BASE_CXX_DEFINES += -DALLOW_LLVM_CL=0 -DALLOW_LLVM_IR=0 \
+ -DALLOW_LLVM_IR_AS_INPUT=0 -DALLOW_TIMERS=0 -DALLOW_MINIMAL_BUILD=1
else
- BASE_CXX_DEFINES += -DALLOW_DUMP=1 -DALLOW_LLVM_CL=1 -DALLOW_LLVM_IR=1 \
- -DALLOW_LLVM_IR_AS_INPUT=1 -DALLOW_MINIMAL_BUILD=0
+ BASE_CXX_DEFINES += -DALLOW_LLVM_CL=1 -DALLOW_LLVM_IR=1 \
+ -DALLOW_LLVM_IR_AS_INPUT=1 -DALLOW_TIMERS=1 -DALLOW_MINIMAL_BUILD=0
+endif
+
+ifdef NODUMP
+ OBJDIR := $(OBJDIR)+NoDump
+ BASE_CXX_DEFINES += -DALLOW_DUMP=0
+else
+ BASE_CXX_DEFINES += -DALLOW_DUMP=1
endif
CXX_DEFINES := $(BASE_CXX_DEFINES) -DPNACL_BROWSER_TRANSLATOR=0
diff --git a/src/IceBuildDefs.h b/src/IceBuildDefs.h
index 948ef13..3b32cec 100644
--- a/src/IceBuildDefs.h
+++ b/src/IceBuildDefs.h
@@ -77,6 +77,8 @@
// The ALLOW_* etc. symbols must be #defined to zero or non-zero.
/// Return true if ALLOW_DUMP is defined as a non-zero value
constexpr bool dump() { return ALLOW_DUMP; }
+/// Return true if ALLOW_TIMERS is defined as a non-zero value
+constexpr bool timers() { return ALLOW_TIMERS; }
/// Return true if ALLOW_LLVM_CL is defined as a non-zero value
// TODO(stichnot): this ALLOW_LLVM_CL is a TBD option which will
// allow for replacement of llvm:cl command line processor with a
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index a44e6b3..c3d91e6 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -187,9 +187,6 @@
void Cfg::translate() {
if (hasError())
return;
- // FunctionTimer conditionally pushes/pops a TimerMarker if TimeEachFunction
- // is enabled.
- std::unique_ptr<TimerMarker> FunctionTimer;
if (BuildDefs::dump()) {
const IceString &TimingFocusOn =
getContext()->getFlags().getTimingFocusOn();
@@ -199,15 +196,12 @@
getContext()->resetTimer(GlobalContext::TSK_Default);
getContext()->setTimerName(GlobalContext::TSK_Default, Name);
}
- if (getContext()->getFlags().getTimeEachFunction())
- FunctionTimer.reset(new TimerMarker(
- getContext()->getTimerID(GlobalContext::TSK_Funcs, Name),
- getContext(), GlobalContext::TSK_Funcs));
if (isVerbose(IceV_Status)) {
getContext()->getStrDump() << ">>>Translating "
<< getFunctionNameAndSize() << "\n";
}
}
+ TimerMarker T_func(getContext(), getFunctionName());
TimerMarker T(TimerStack::TT_translate, this);
dump("Initial CFG");
@@ -573,6 +567,7 @@
}
void Cfg::processAllocas(bool SortAndCombine) {
+ TimerMarker _(TimerStack::TT_alloca, this);
const uint32_t StackAlignment = getTarget()->getStackAlignment();
CfgNode *EntryNode = getEntryNode();
// LLVM enforces power of 2 alignment.
@@ -1083,14 +1078,14 @@
}
// Dumps the IR with an optional introductory message.
-void Cfg::dump(const IceString &Message) {
+void Cfg::dump(const char *Message) {
if (!BuildDefs::dump())
return;
if (!isVerbose())
return;
OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrDump();
- if (!Message.empty())
+ if (Message[0])
Str << "================ " << Message << " ================\n";
if (isVerbose(IceV_Mem)) {
Str << "Memory size = " << getTotalMemoryMB() << " MB\n";
diff --git a/src/IceCfg.h b/src/IceCfg.h
index c24319c..d3aabe9 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -227,7 +227,7 @@
void emitIAS();
static void emitTextHeader(const IceString &Name, GlobalContext *Ctx,
const Assembler *Asm);
- void dump(const IceString &Message = "");
+ void dump(const char *Message = "");
/// Allocate data of type T using the per-Cfg allocator.
template <typename T> T *allocate() { return Allocator->Allocate<T>(); }
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp
index 7a0dcbb..ff77533 100644
--- a/src/IceCompiler.cpp
+++ b/src/IceCompiler.cpp
@@ -139,8 +139,8 @@
Ctx.dumpTimers();
if (Ctx.getFlags().getTimeEachFunction()) {
- constexpr bool DumpCumulative = false;
- Ctx.dumpTimers(GlobalContext::TSK_Funcs, DumpCumulative);
+ constexpr bool NoDumpCumulative = false;
+ Ctx.dumpTimers(GlobalContext::TSK_Funcs, NoDumpCumulative);
}
constexpr bool FinalStats = true;
Ctx.dumpStats("_FINAL_", FinalStats);
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index e69fd7a..2440353 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -913,21 +913,12 @@
}
void Converter::convertFunctions() {
- const TimerStackIdT StackID = GlobalContext::TSK_Funcs;
for (const Function &I : *Mod) {
if (I.empty())
continue;
-
- TimerIdT TimerID = 0;
- const bool TimeThisFunction = Ctx->getFlags().getTimeEachFunction();
- if (TimeThisFunction) {
- TimerID = Ctx->getTimerID(StackID, I.getName());
- Ctx->pushTimer(TimerID, StackID);
- }
+ TimerMarker _(Ctx, I.getName());
LLVM2ICEFunctionConverter FunctionConverter(*this);
FunctionConverter.convertFunction(&I);
- if (TimeThisFunction)
- Ctx->popTimer(TimerID, StackID);
}
}
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index d66b553..8f7d3fc 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -219,14 +219,8 @@
void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
bool IsInternal, Assembler *Asm) {
assert(!SectionNumbersAssigned);
+ TimerMarker T_func(&Ctx, FuncName);
TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
- constexpr TimerStackIdT StackID = GlobalContext::TSK_Funcs;
- TimerIdT TimerID = 0;
- bool TimeThisFunction = Ctx.getFlags().getTimeEachFunction();
- if (TimeThisFunction) {
- TimerID = Ctx.getTimerID(StackID, FuncName);
- Ctx.pushTimer(TimerID, StackID);
- }
ELFTextSection *Section = nullptr;
ELFRelocationSection *RelSection = nullptr;
const bool FunctionSections = Ctx.getFlags().getFunctionSections();
@@ -278,8 +272,6 @@
RelSection->addRelocations(OffsetInSection, Asm->fixups(), SymTab);
}
Section->appendData(Str, Asm->getBufferView());
- if (TimeThisFunction)
- Ctx.popTimer(TimerID, StackID);
}
namespace {
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index c96db74..62e5ee7 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -276,7 +276,7 @@
AllThreadContexts.push_back(MyTLS);
ICE_TLS_SET_FIELD(TLS, MyTLS);
// Pre-register built-in stack names.
- if (BuildDefs::dump()) {
+ if (BuildDefs::timers()) {
// TODO(stichnot): There needs to be a strong relationship between
// the newTimerStackID() return values and TSK_Default/TSK_Funcs.
newTimerStackID("Total across all functions");
@@ -807,7 +807,7 @@
}
TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return 0;
auto Timers = getTimers();
TimerStackIdT NewID = Timers->size();
@@ -853,23 +853,31 @@
// such as the use of atomics to modify queue elements.
void GlobalContext::optQueueBlockingPush(std::unique_ptr<Cfg> Func) {
assert(Func);
- OptQ.blockingPush(Func.release());
+ {
+ TimerMarker _(TimerStack::TT_qTransPush, this);
+ OptQ.blockingPush(Func.release());
+ }
if (getFlags().isSequential())
translateFunctions();
}
std::unique_ptr<Cfg> GlobalContext::optQueueBlockingPop() {
+ TimerMarker _(TimerStack::TT_qTransPop, this);
return std::unique_ptr<Cfg>(OptQ.blockingPop());
}
void GlobalContext::emitQueueBlockingPush(EmitterWorkItem *Item) {
assert(Item);
- EmitQ.blockingPush(Item);
+ {
+ TimerMarker _(TimerStack::TT_qEmitPush, this);
+ EmitQ.blockingPush(Item);
+ }
if (getFlags().isSequential())
emitItems();
}
EmitterWorkItem *GlobalContext::emitQueueBlockingPop() {
+ TimerMarker _(TimerStack::TT_qEmitPop, this);
return EmitQ.blockingPop();
}
@@ -884,7 +892,7 @@
}
void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
auto Timers = getTimers();
assert(Timers->size() > StackID);
@@ -895,6 +903,15 @@
ClFlags GlobalContext::Flags;
ClFlagsExtra GlobalContext::ExtraFlags;
+TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx,
+ const IceString &FuncName) {
+ if (!BuildDefs::timers())
+ return 0;
+ if (!Ctx->getFlags().getTimeEachFunction())
+ return 0;
+ return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName);
+}
+
void TimerMarker::push() {
switch (StackID) {
case GlobalContext::TSK_Default:
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index c8d86af..261f87c 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -118,7 +118,7 @@
/// timers, in the same order, with the same names, but initially
/// empty of timing data.
void initInto(TimerList &Dest) const {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
Dest.clear();
for (const TimerStack &Stack : *this) {
@@ -126,7 +126,7 @@
}
}
void mergeFrom(TimerList &Src) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
assert(size() == Src.size());
size_type i = 0;
@@ -381,7 +381,7 @@
}
EmitterThreads.clear();
- if (BuildDefs::dump()) {
+ if (BuildDefs::timers()) {
auto Timers = getTimers();
for (ThreadContext *TLS : AllThreadContexts)
Timers->mergeFrom(TLS->Timers);
@@ -560,25 +560,33 @@
TimerMarker(TimerIdT ID, GlobalContext *Ctx,
TimerStackIdT StackID = GlobalContext::TSK_Default)
: ID(ID), Ctx(Ctx), StackID(StackID) {
- if (BuildDefs::dump())
+ if (BuildDefs::timers())
push();
}
TimerMarker(TimerIdT ID, const Cfg *Func,
TimerStackIdT StackID = GlobalContext::TSK_Default)
: ID(ID), Ctx(nullptr), StackID(StackID) {
// Ctx gets set at the beginning of pushCfg().
- if (BuildDefs::dump())
+ if (BuildDefs::timers())
pushCfg(Func);
}
+ TimerMarker(GlobalContext *Ctx, const IceString &FuncName)
+ : ID(getTimerIdFromFuncName(Ctx, FuncName)), Ctx(Ctx),
+ StackID(GlobalContext::TSK_Funcs) {
+ if (BuildDefs::timers())
+ push();
+ }
~TimerMarker() {
- if (BuildDefs::dump() && Active)
+ if (BuildDefs::timers() && Active)
Ctx->popTimer(ID, StackID);
}
private:
void push();
void pushCfg(const Cfg *Func);
+ static TimerIdT getTimerIdFromFuncName(GlobalContext *Ctx,
+ const IceString &FuncName);
const TimerIdT ID;
GlobalContext *Ctx;
const TimerStackIdT StackID;
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 468883b..1b57750 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -313,6 +313,7 @@
}
void TargetLowering::genTargetHelperCalls() {
+ TimerMarker T(TimerStack::TT_genHelpers, Func);
Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 7707d14..0b5423e 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -615,6 +615,7 @@
}
template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
+ TimerMarker _(TimerStack::TT_findRMW, Func);
Func->dump("Before RMW");
if (Func->isVerbose(IceV_RMW))
Func->getContext()->lockStr();
@@ -732,6 +733,7 @@
}
template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
+ TimerMarker _(TimerStack::TT_loadOpt, Func);
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
while (!Context.atEnd()) {
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp
index a9baf47..1ae4e53 100644
--- a/src/IceTimerTree.cpp
+++ b/src/IceTimerTree.cpp
@@ -32,7 +32,7 @@
TimerStack::TimerStack(const IceString &Name)
: Name(Name), FirstTimestamp(timestamp()), LastTimestamp(FirstTimestamp) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
Nodes.resize(1); // Reserve Nodes[0] for the root node (sentinel).
IDs.resize(TT__num);
@@ -49,7 +49,7 @@
// Returns the unique timer ID for the given Name, creating a new ID if needed.
TimerIdT TimerStack::getTimerID(const IceString &Name) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return 0;
if (IDsIndex.find(Name) == IDsIndex.end()) {
IDsIndex[Name] = IDs.size();
@@ -76,7 +76,7 @@
// Merges two timer stacks, by combining and summing corresponding entries.
// This timer stack is updated from Src.
void TimerStack::mergeFrom(const TimerStack &Src) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
TranslationType Mapping = translateIDsFrom(Src);
TTindex SrcIndex = 0;
@@ -146,7 +146,7 @@
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
constexpr bool UpdateCounts = false;
update(UpdateCounts);
@@ -157,7 +157,7 @@
// Pops the top marker from the timer stack. Validates via assert() that the
// expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
constexpr bool UpdateCounts = true;
update(UpdateCounts);
@@ -174,7 +174,7 @@
// At a state change (e.g. push or pop), updates the flat and cumulative
// timings for everything on the timer stack.
void TimerStack::update(bool UpdateCounts) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
++StateChangeCount;
// Whenever the stack is about to change, we grab the time delta since the
@@ -210,7 +210,7 @@
}
void TimerStack::reset() {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
StateChangeCount = 0;
FirstTimestamp = LastTimestamp = timestamp();
@@ -228,7 +228,7 @@
// Dump the Map items in reverse order of their time contribution.
void dumpHelper(Ostream &Str, const DumpMapType &Map, double TotalTime) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
for (auto &I : reverse_range(Map)) {
char buf[80];
@@ -244,7 +244,7 @@
// MaxVal=5 ==> "[%1lu] "
// MaxVal=9876 ==> "[%4lu] "
void makePrintfFormatString(char *Buf, size_t BufLen, size_t MaxVal) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
int NumDigits = 0;
do {
@@ -257,7 +257,7 @@
} // end of anonymous namespace
void TimerStack::dump(Ostream &Str, bool DumpCumulative) {
- if (!BuildDefs::dump())
+ if (!BuildDefs::timers())
return;
constexpr bool UpdateCounts = true;
update(UpdateCounts);
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
index 687275a..340b5b8 100644
--- a/src/IceTimerTree.def
+++ b/src/IceTimerTree.def
@@ -20,6 +20,7 @@
X(O2) \
X(Om1) \
X(advancedPhiLowering) \
+ X(alloca) \
X(computeLoopNestDepth) \
X(convertToIce) \
X(deletePhis) \
@@ -29,14 +30,17 @@
X(doNopInsertion) \
X(emitAsm) \
X(emitGlobalInitializers) \
+ X(findRMW) \
X(genCode) \
X(genFrame) \
+ X(genHelpers) \
X(initUnhandled) \
X(linearScan) \
X(liveRange) \
X(liveness) \
X(livenessLightweight) \
X(llvmConvert) \
+ X(loadOpt) \
X(lowerPhiAssignments) \
X(parse) \
X(parseConstants) \
@@ -49,6 +53,10 @@
X(phiValidation) \
X(placePhiLoads) \
X(placePhiStores) \
+ X(qEmitPop) \
+ X(qEmitPush) \
+ X(qTransPop) \
+ X(qTransPush) \
X(regAlloc) \
X(renumberInstructions) \
X(szmain) \
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 1b6cadd..698dff6 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -1339,21 +1339,14 @@
NextLocalInstIndex(Context->getNumGlobalIDs()) {}
bool convertFunction() {
- const Ice::TimerStackIdT StackID = Ice::GlobalContext::TSK_Funcs;
- Ice::TimerIdT TimerID = 0;
- const bool TimeThisFunction = getFlags().getTimeEachFunction();
- if (TimeThisFunction) {
- TimerID = getTranslator().getContext()->getTimerID(StackID,
- FuncDecl->getName());
- getTranslator().getContext()->pushTimer(TimerID, StackID);
- }
-
- // Note: The Cfg is created, even when IR generation is disabled. This is
- // done to install a CfgLocalAllocator for various internal containers.
- Func = Ice::Cfg::create(getTranslator().getContext(),
- getTranslator().getNextSequenceNumber());
bool ParserResult;
{
+ Ice::TimerMarker T(getTranslator().getContext(), FuncDecl->getName());
+ // Note: The Cfg is created, even when IR generation is disabled. This is
+ // done to install a CfgLocalAllocator for various internal containers.
+ Func = Ice::Cfg::create(getTranslator().getContext(),
+ getTranslator().getNextSequenceNumber());
+
Ice::CfgLocalAllocatorScope _(Func.get());
// TODO(kschimpf) Clean up API to add a function signature to a CFG.
@@ -1370,12 +1363,6 @@
ParserResult = ParseThisBlock();
- // Temporarily end per-function timing, which will be resumed by the
- // translator function. This is because translation may be done
- // asynchronously in a separate thread.
- if (TimeThisFunction)
- getTranslator().getContext()->popTimer(TimerID, StackID);
-
// Note: Once any errors have been found, we turn off all translation of
// all remaining functions. This allows successive parsing errors to be
// reported, without adding extra checks to the translator for such