Subzero: Fix timers for multithreaded translation.
Now that multithreaded parsing and translation is in place, timer operations have to be made thread-local. After the non-main threads end, their thread-local timer data needs to be merged into the global timer data, which resides in the GlobalContext object. The merge is a bit tricky because the internal timer stack structure is built up dynamically as items are pushed and popped. Two threads may have radically different timing data:
1. The parser thread profile is completely different from a translator thread.
2. For -timing-funcs, two translator threads hold data for entirely different sets of functions.
A bit more tweaking will need to be done to make the timing output fully usable in a multithreaded run. Because of multiple threads, times may add up to >100%. Also, time spent blocked is being "unfairly" attributed to the caller of the blocking operation - we should either count the user time instead of wall-clock time, or add a special timer marker for blocking locking operations.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/878383004
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index f6deda0..53f9a9f 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -81,13 +81,21 @@
void Cfg::translate() {
if (hasError())
return;
+ // FunctionTimer conditionally pushes/pops a TimerMarker if
+ // TimeEachFunction is enabled.
+ std::unique_ptr<TimerMarker> FunctionTimer;
if (ALLOW_DUMP) {
const IceString &TimingFocusOn = getContext()->getFlags().TimingFocusOn;
- if (TimingFocusOn == "*" || TimingFocusOn == getFunctionName()) {
+ const IceString &Name = getFunctionName();
+ if (TimingFocusOn == "*" || TimingFocusOn == Name) {
setFocusedTiming();
getContext()->resetTimer(GlobalContext::TSK_Default);
- getContext()->setTimerName(GlobalContext::TSK_Default, getFunctionName());
+ getContext()->setTimerName(GlobalContext::TSK_Default, Name);
}
+ if (getContext()->getFlags().TimeEachFunction)
+ FunctionTimer.reset(new TimerMarker(
+ getContext()->getTimerID(GlobalContext::TSK_Funcs, Name),
+ getContext(), GlobalContext::TSK_Funcs));
}
TimerMarker T(TimerStack::TT_translate, this);
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 413278f..ca7097a 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -143,8 +143,9 @@
// Create a new ThreadContext for the current thread. No need to
// lock AllThreadContexts at this point since no other threads have
// access yet to this GlobalContext object.
- AllThreadContexts.push_back(new ThreadContext());
- ICE_TLS_SET_FIELD(TLS, AllThreadContexts.back());
+ ThreadContext *MyTLS = new ThreadContext();
+ AllThreadContexts.push_back(MyTLS);
+ ICE_TLS_SET_FIELD(TLS, MyTLS);
// Pre-register built-in stack names.
if (ALLOW_DUMP) {
// TODO(stichnot): There needs to be a strong relationship between
@@ -152,6 +153,7 @@
newTimerStackID("Total across all functions");
newTimerStackID("Per-function summary");
}
+ Timers.initInto(MyTLS->Timers);
if (Flags.UseELFWriter) {
ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
}
@@ -503,38 +505,32 @@
TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
const IceString &Name) {
- auto Timers = getTimers();
+ auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
return Timers->at(StackID).getTimerID(Name);
}
void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
- // TODO(stichnot): Timers are completely broken for multithreading; fix.
- if (getFlags().NumTranslationThreads)
- llvm::report_fatal_error("Timers and multithreading are currently broken");
- auto Timers = getTimers();
+ auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).push(ID);
}
void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
- // TODO(stichnot): Timers are completely broken for multithreading; fix.
- if (getFlags().NumTranslationThreads)
- llvm::report_fatal_error("Timers and multithreading are currently broken");
- auto Timers = getTimers();
+ auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).pop(ID);
}
void GlobalContext::resetTimer(TimerStackIdT StackID) {
- auto Timers = getTimers();
+ auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).reset();
}
void GlobalContext::setTimerName(TimerStackIdT StackID,
const IceString &NewName) {
- auto Timers = getTimers();
+ auto Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
assert(StackID < Timers->size());
Timers->at(StackID).setName(NewName);
}
@@ -560,13 +556,26 @@
Timers->at(StackID).dump(getStrDump(), DumpCumulative);
}
-TimerMarker::TimerMarker(TimerIdT ID, const Cfg *Func)
- : ID(ID), Ctx(Func->getContext()), Active(false) {
- if (ALLOW_DUMP) {
- Active = Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled;
- if (Active)
- Ctx->pushTimer(ID);
+void TimerMarker::push() {
+ switch (StackID) {
+ case GlobalContext::TSK_Default:
+ Active = Ctx->getFlags().SubzeroTimingEnabled;
+ break;
+ case GlobalContext::TSK_Funcs:
+ Active = Ctx->getFlags().TimeEachFunction;
+ break;
+ default:
+ break;
}
+ if (Active)
+ Ctx->pushTimer(ID, StackID);
+}
+
+void TimerMarker::pushCfg(const Cfg *Func) {
+ Ctx = Func->getContext();
+ Active = Func->getFocusedTiming() || Ctx->getFlags().SubzeroTimingEnabled;
+ if (Active)
+ Ctx->pushTimer(ID, StackID);
}
ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index ac321e6..04f08a2 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -83,6 +83,35 @@
uint32_t Fills;
};
+ // TimerList is a vector of TimerStack objects, with extra methods
+ // to initialize and merge these vectors.
+ class TimerList : public std::vector<TimerStack> {
+ public:
+ // initInto() initializes a target list of timers based on the
+ // current list. In particular, it creates the same number of
+ // timers, in the same order, with the same names, but initially
+ // empty of timing data.
+ void initInto(TimerList &Dest) const {
+ if (!ALLOW_DUMP)
+ return;
+ Dest.clear();
+ for (const TimerStack &Stack : *this) {
+ Dest.push_back(TimerStack(Stack.getName()));
+ }
+ }
+ void mergeFrom(TimerList &Src) {
+ if (!ALLOW_DUMP)
+ return;
+ assert(size() == Src.size());
+ size_type i = 0;
+ for (TimerStack &Stack : *this) {
+ assert(Stack.getName() == Src[i].getName());
+ Stack.mergeFrom(Src[i]);
+ ++i;
+ }
+ }
+ };
+
// ThreadContext contains thread-local data. This data can be
// combined/reduced as needed after all threads complete.
class ThreadContext {
@@ -92,7 +121,7 @@
public:
ThreadContext() {}
CodeStats StatsFunction;
- std::vector<TimerStack> Timers;
+ TimerList Timers;
};
public:
@@ -211,14 +240,20 @@
// These are predefined TimerStackIdT values.
enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num };
+ // newTimerStackID() creates a new TimerStack in the global space.
+ // It does not affect any TimerStack objects in TLS.
TimerStackIdT newTimerStackID(const IceString &Name);
- TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
- void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
- void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
- void resetTimer(TimerStackIdT StackID);
- void setTimerName(TimerStackIdT StackID, const IceString &NewName);
+ // dumpTimers() dumps the global timer data. As such, one probably
+ // wants to call mergeTimerStacks() as a prerequisite.
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
+ // The following methods affect only the calling thread's TLS timer
+ // data.
+ TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
+ void pushTimer(TimerIdT ID, TimerStackIdT StackID);
+ void popTimer(TimerIdT ID, TimerStackIdT StackID);
+ void resetTimer(TimerStackIdT StackID);
+ void setTimerName(TimerStackIdT StackID, const IceString &NewName);
// Adds a newly parsed and constructed function to the Cfg work
// queue. Notifies any idle workers that a new function is
@@ -235,8 +270,10 @@
void startWorkerThreads() {
size_t NumWorkers = getFlags().NumTranslationThreads;
+ auto Timers = getTimers();
for (size_t i = 0; i < NumWorkers; ++i) {
ThreadContext *WorkerTLS = new ThreadContext();
+ Timers->initInto(WorkerTLS->Timers);
AllThreadContexts.push_back(WorkerTLS);
TranslationThreads.push_back(std::thread(
&GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
@@ -254,6 +291,11 @@
}
TranslationThreads.clear();
// TODO(stichnot): join the emitter thread.
+ if (ALLOW_DUMP) {
+ auto Timers = getTimers();
+ for (ThreadContext *TLS : AllThreadContexts)
+ Timers->mergeFrom(TLS->Timers);
+ }
}
// Translation thread startup routine.
@@ -301,7 +343,7 @@
ICE_CACHELINE_BOUNDARY;
// Managed by getTimers()
GlobalLockType TimerLock;
- std::vector<TimerStack> Timers;
+ TimerList Timers;
ICE_CACHELINE_BOUNDARY;
// StrLock is a global lock on the dump and emit output streams.
@@ -331,8 +373,8 @@
LockedPtr<CodeStats> getStatsCumulative() {
return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
}
- LockedPtr<std::vector<TimerStack>> getTimers() {
- return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);
+ LockedPtr<TimerList> getTimers() {
+ return LockedPtr<TimerList>(&Timers, &TimerLock);
}
std::vector<ThreadContext *> AllThreadContexts;
@@ -357,24 +399,31 @@
TimerMarker &operator=(const TimerMarker &) = delete;
public:
- TimerMarker(TimerIdT ID, GlobalContext *Ctx)
- : ID(ID), Ctx(Ctx), Active(false) {
- if (ALLOW_DUMP) {
- Active = Ctx->getFlags().SubzeroTimingEnabled;
- if (Active)
- Ctx->pushTimer(ID);
- }
+ TimerMarker(TimerIdT ID, GlobalContext *Ctx,
+ TimerStackIdT StackID = GlobalContext::TSK_Default)
+ : ID(ID), Ctx(Ctx), StackID(StackID), Active(false) {
+ if (ALLOW_DUMP)
+ push();
}
- TimerMarker(TimerIdT ID, const Cfg *Func);
+ TimerMarker(TimerIdT ID, const Cfg *Func,
+ TimerStackIdT StackID = GlobalContext::TSK_Default)
+ : ID(ID), Ctx(nullptr), StackID(StackID), Active(false) {
+ // Ctx gets set at the beginning of pushCfg().
+ if (ALLOW_DUMP)
+ pushCfg(Func);
+ }
~TimerMarker() {
if (ALLOW_DUMP && Active)
- Ctx->popTimer(ID);
+ Ctx->popTimer(ID, StackID);
}
private:
- TimerIdT ID;
- GlobalContext *const Ctx;
+ void push();
+ void pushCfg(const Cfg *Func);
+ const TimerIdT ID;
+ GlobalContext *Ctx;
+ const TimerStackIdT StackID;
bool Active;
};
diff --git a/src/IceTimerTree.cpp b/src/IceTimerTree.cpp
index 2c912e4..f1366d5 100644
--- a/src/IceTimerTree.cpp
+++ b/src/IceTimerTree.cpp
@@ -24,8 +24,10 @@
StateChangeCount(0), StackTop(0) {
if (!ALLOW_DUMP)
return;
- Nodes.resize(1); // Reserve Nodes[0] for the root node.
+ Nodes.resize(1); // Reserve Nodes[0] for the root node (sentinel).
IDs.resize(TT__num);
+ LeafTimes.resize(TT__num);
+ LeafCounts.resize(TT__num);
#define STR(s) #s
#define X(tag) \
IDs[TT_##tag] = STR(tag); \
@@ -43,29 +45,108 @@
if (IDsIndex.find(Name) == IDsIndex.end()) {
IDsIndex[Name] = IDs.size();
IDs.push_back(Name);
+ LeafTimes.push_back(decltype(LeafTimes)::value_type());
+ LeafCounts.push_back(decltype(LeafCounts)::value_type());
}
return IDsIndex[Name];
}
+// Creates a mapping from TimerIdT (leaf) values in the Src timer
+// stack into TimerIdT values in this timer stack. Creates new
+// entries in this timer stack as needed.
+TimerStack::TranslationType
+TimerStack::translateIDsFrom(const TimerStack &Src) {
+ size_t Size = Src.IDs.size();
+ TranslationType Mapping(Size);
+ for (TimerIdT i = 0; i < Size; ++i) {
+ Mapping[i] = getTimerID(Src.IDs[i]);
+ }
+ return Mapping;
+}
+
+// Merges two timer stacks, by combining and summing corresponding
+// entries. This timer stack is updated from Src.
+void TimerStack::mergeFrom(const TimerStack &Src) {
+ if (!ALLOW_DUMP)
+ return;
+ TranslationType Mapping = translateIDsFrom(Src);
+ TTindex SrcIndex = 0;
+ for (const TimerTreeNode &SrcNode : Src.Nodes) {
+ // The first node is reserved as a sentinel, so avoid it.
+ if (SrcIndex > 0) {
+ // Find the full path to the Src node, translated to path
+ // components corresponding to this timer stack.
+ PathType MyPath = Src.getPath(SrcIndex, Mapping);
+ // Find a node in this timer stack corresponding to the given
+ // path, creating new interior nodes as necessary.
+ TTindex MyIndex = findPath(MyPath);
+ Nodes[MyIndex].Time += SrcNode.Time;
+ Nodes[MyIndex].UpdateCount += SrcNode.UpdateCount;
+ }
+ ++SrcIndex;
+ }
+ for (TimerIdT i = 0; i < Src.LeafTimes.size(); ++i) {
+ LeafTimes[Mapping[i]] += Src.LeafTimes[i];
+ LeafCounts[Mapping[i]] += Src.LeafCounts[i];
+ }
+ StateChangeCount += Src.StateChangeCount;
+}
+
+// Constructs a path consisting of the sequence of leaf values leading
+// to a given node, with the Mapping translation applied to the leaf
+// values. The path ends up being in "reverse" order, i.e. from leaf
+// to root.
+TimerStack::PathType TimerStack::getPath(TTindex Index,
+ const TranslationType &Mapping) const {
+ PathType Path;
+ while (Index) {
+ Path.push_back(Mapping[Nodes[Index].Interior]);
+ assert(Nodes[Index].Parent < Index);
+ Index = Nodes[Index].Parent;
+ }
+ return Path;
+}
+
+// Given a parent node and a leaf ID, returns the index of the
+// parent's child ID, creating a new node for the child as necessary.
+TimerStack::TTindex TimerStack::getChildIndex(TimerStack::TTindex Parent,
+ TimerIdT ID) {
+ if (Nodes[Parent].Children.size() <= ID)
+ Nodes[Parent].Children.resize(ID + 1);
+ if (Nodes[Parent].Children[ID] == 0) {
+ TTindex Size = Nodes.size();
+ Nodes[Parent].Children[ID] = Size;
+ Nodes.resize(Size + 1);
+ Nodes[Size].Parent = Parent;
+ Nodes[Size].Interior = ID;
+ }
+ return Nodes[Parent].Children[ID];
+}
+
+// Finds a node in the timer stack corresponding to the given path,
+// creating new interior nodes as necessary.
+TimerStack::TTindex TimerStack::findPath(const PathType &Path) {
+ TTindex CurIndex = 0;
+ // The path is in reverse order (leaf to root), so it needs to be
+ // followed in reverse.
+ for (TTindex Index : reverse_range(Path)) {
+ CurIndex = getChildIndex(CurIndex, Index);
+ }
+ assert(CurIndex); // shouldn't be the sentinel node
+ return CurIndex;
+}
+
// Pushes a new marker onto the timer stack.
void TimerStack::push(TimerIdT ID) {
if (!ALLOW_DUMP)
return;
const bool UpdateCounts = false;
update(UpdateCounts);
- if (Nodes[StackTop].Children.size() <= ID)
- Nodes[StackTop].Children.resize(ID + 1);
- if (Nodes[StackTop].Children[ID] == 0) {
- TTindex Size = Nodes.size();
- Nodes[StackTop].Children[ID] = Size;
- Nodes.resize(Size + 1);
- Nodes[Size].Parent = StackTop;
- Nodes[Size].Interior = ID;
- }
- StackTop = Nodes[StackTop].Children[ID];
+ StackTop = getChildIndex(StackTop, ID);
+ assert(StackTop);
}
-// Pop the top marker from the timer stack. Validates via assert()
+// Pops the top marker from the timer stack. Validates via assert()
// that the expected marker is popped.
void TimerStack::pop(TimerIdT ID) {
if (!ALLOW_DUMP)
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
index dba5ae7..8c955c7 100644
--- a/src/IceTimerTree.def
+++ b/src/IceTimerTree.def
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETIMERTREE_DEF
+#define SUBZERO_SRC_ICETIMERTREE_DEF
#define TIMERTREE_TABLE \
/* enum value */ \
@@ -53,5 +54,4 @@
X(vmetadata)
//#define X(tag)
-#define SUBZERO_SRC_ICETIMERTREE_DEF
#endif // SUBZERO_SRC_ICETIMERTREE_DEF
diff --git a/src/IceTimerTree.h b/src/IceTimerTree.h
index 795bbee..e40a917 100644
--- a/src/IceTimerTree.h
+++ b/src/IceTimerTree.h
@@ -22,8 +22,16 @@
class TimerStack {
TimerStack &operator=(const TimerStack &) = delete;
- // Timer tree index type
+ // Timer tree index type. A variable of this type is used to access
+ // an interior, not-necessarily-leaf node of the tree.
typedef std::vector<class TimerTreeNode>::size_type TTindex;
+ // Representation of a path of leaf values leading to a particular
+ // node. The representation happens to be in "reverse" order,
+ // i.e. from leaf/interior to root, for implementation efficiency.
+ typedef llvm::SmallVector<TTindex, 8> PathType;
+ // Representation of a mapping of leaf node indexes from one timer
+ // stack to another.
+ typedef std::vector<TimerIdT> TranslationType;
// TimerTreeNode represents an interior or leaf node in the call tree.
// It contains a list of children, a pointer to its parent, and the
@@ -54,7 +62,9 @@
TimerStack(const IceString &Name);
TimerStack(const TimerStack &) = default;
TimerIdT getTimerID(const IceString &Name);
+ void mergeFrom(const TimerStack &Src);
void setName(const IceString &NewName) { Name = NewName; }
+ const IceString &getName() const { return Name; }
void push(TimerIdT ID);
void pop(TimerIdT ID);
void reset();
@@ -63,6 +73,10 @@
private:
void update(bool UpdateCounts);
static double timestamp();
+ TranslationType translateIDsFrom(const TimerStack &Src);
+ PathType getPath(TTindex Index, const TranslationType &Mapping) const;
+ TTindex getChildIndex(TTindex Parent, TimerIdT ID);
+ TTindex findPath(const PathType &Path);
IceString Name;
double FirstTimestamp;
double LastTimestamp;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 893f912..0c06231 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -1157,7 +1157,7 @@
if (ALLOW_DUMP && getFlags().TimeEachFunction) {
getTranslator().getContext()->popTimer(
getTranslator().getContext()->getTimerID(
- Ice::GlobalContext::TSK_Funcs, Func->getFunctionName()),
+ Ice::GlobalContext::TSK_Funcs, FuncDecl->getName()),
Ice::GlobalContext::TSK_Funcs);
}
}
@@ -1827,6 +1827,7 @@
void FunctionParser::ExitBlock() {
if (isIRGenerationDisabled()) {
popTimerIfTimingEachFunction();
+ delete Func;
return;
}
// Before translating, check for blocks without instructions, and
@@ -1844,13 +1845,16 @@
++Index;
}
Func->computePredecessors();
+ // Temporarily end per-function timing, which will be resumed by the
+ // translator function. This is because translation may be done
+ // asynchronously in a separate thread.
+ popTimerIfTimingEachFunction();
// Note: Once any errors have been found, we turn off all
// translation of all remaining functions. This allows use to see
// multiple errors, without adding extra checks to the translator
// for such parsing errors.
if (Context->getNumErrors() == 0)
getTranslator().translateFcn(Func);
- popTimerIfTimingEachFunction();
}
void FunctionParser::ReportInvalidBinaryOp(Ice::InstArithmetic::OpKind Op,