Subzero: Add locking to prepare for multithreaded translation.
This just gets the locking in place. Actual multithreading will be added later.
Mutexes are added for accessing the GlobalContext allocator, the constant pool, the stats data, and the profiling timers. These are managed via the LockedPtr<> helper. Finer grain locks on the constant pool may be added later, i.e. a separate lock for each data type.
An vector of pointers to TLS objects is added to GlobalContext. Each new thread will get its own TLS object, whose address is added to the vector. (After threads complete, things like stats can be combined by iterating over the vector.)
The dump/emit streams are guarded by a separate lock, to avoid fine-grain interleaving of output by multiple threads. E.g., lock the streams, emit an entire function, and unlock the streams. This works for dumping too, though dump output for different passes on the same function may be interleaved with that of another thread. There is an OstreamLocker helper class to keep this simple.
CodeStats is made an inner class of GlobalContext (this was missed on a previous CL).
BUG= none
R=jfb@chromium.org, jvoung@chromium.org, kschimpf@google.com
Review URL: https://codereview.chromium.org/848193003
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index e8c74ba..b5cc93b 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -56,6 +56,7 @@
void Cfg::setError(const IceString &Message) {
HasError = true;
ErrorMessage = Message;
+ OstreamLocker L(Ctx);
Ctx->getStrDump() << "ICE translation error: " << ErrorMessage << "\n";
}
@@ -335,6 +336,7 @@
bool Cfg::validateLiveness() const {
TimerMarker T(TimerStack::TT_validateLiveness, this);
bool Valid = true;
+ OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrDump();
for (CfgNode *Node : Nodes) {
Inst *FirstInst = nullptr;
@@ -442,6 +444,7 @@
liveness(Liveness_Basic);
dump("After recomputing liveness for -decorate-asm");
}
+ OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrEmit();
IceString MangledName = getContext()->mangleName(getFunctionName());
emitTextHeader(MangledName);
@@ -454,8 +457,9 @@
TimerMarker T(TimerStack::TT_emit, this);
assert(!Ctx->getFlags().DecorateAsm);
IceString MangledName = getContext()->mangleName(getFunctionName());
- if (!Ctx->getFlags().UseELFWriter)
- emitTextHeader(MangledName);
+ // The emitIAS() routines emit into the internal assembler buffer,
+ // so there's no need to lock the streams until we're ready to call
+ // emitIASBytes().
for (CfgNode *Node : Nodes)
Node->emitIAS(this);
// Now write the function to the file and track.
@@ -464,6 +468,8 @@
Ctx->getObjectWriter()->writeFunctionCode(MangledName, getInternal(),
getAssembler<Assembler>());
} else {
+ OstreamLocker L(Ctx);
+ emitTextHeader(MangledName);
getAssembler<Assembler>()->emitIASBytes(Ctx);
}
}
@@ -474,6 +480,7 @@
return;
if (!Ctx->isVerbose())
return;
+ OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrDump();
if (!Message.empty())
Str << "================ " << Message << " ================\n";
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index cdfdc43..72f5a5a 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -793,13 +793,12 @@
if (GlobalPrefix.empty())
return;
uint32_t NameIndex = 0;
- Ostream &errs = Ctx->getStrDump();
for (auto V = Mod->global_begin(), E = Mod->global_end(); V != E; ++V) {
if (!V->hasName()) {
V->setName(createUnnamedName(GlobalPrefix, NameIndex));
++NameIndex;
} else {
- checkIfUnnamedNameSafe(V->getName(), "global", GlobalPrefix, errs);
+ checkIfUnnamedNameSafe(V->getName(), "global", GlobalPrefix);
}
}
}
@@ -809,13 +808,12 @@
if (FunctionPrefix.empty())
return;
uint32_t NameIndex = 0;
- Ostream &errs = Ctx->getStrDump();
for (Function &F : *Mod) {
if (!F.hasName()) {
F.setName(createUnnamedName(FunctionPrefix, NameIndex));
++NameIndex;
} else {
- checkIfUnnamedNameSafe(F.getName(), "function", FunctionPrefix, errs);
+ checkIfUnnamedNameSafe(F.getName(), "function", FunctionPrefix);
}
}
}
diff --git a/src/IceDefs.h b/src/IceDefs.h
index cd4011b..991e47f 100644
--- a/src/IceDefs.h
+++ b/src/IceDefs.h
@@ -112,7 +112,7 @@
// ends a variable's live range.
typedef std::pair<SizeT, InstNumberT> LiveBeginEndMapEntry;
typedef std::vector<LiveBeginEndMapEntry,
- CfgLocalAllocator<LiveBeginEndMapEntry> > LiveBeginEndMap;
+ CfgLocalAllocator<LiveBeginEndMapEntry>> LiveBeginEndMap;
typedef llvm::BitVector LivenessBV;
typedef uint32_t TimerStackIdT;
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index aaa3275..2b143f7 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -108,7 +108,7 @@
UndefPool Undefs;
};
-void CodeStats::dump(const IceString &Name, Ostream &Str) {
+void GlobalContext::CodeStats::dump(const IceString &Name, Ostream &Str) {
if (!ALLOW_DUMP)
return;
Str << "|" << Name << "|Inst Count |" << InstructionsEmitted << "\n";
@@ -132,8 +132,15 @@
: StrDump(OsDump), StrEmit(OsEmit), VMask(Mask),
ConstPool(new ConstantPool()), Arch(Arch), Opt(Opt),
TestPrefix(TestPrefix), Flags(Flags), RNG(""), ObjectWriter() {
+ // Create a new ThreadContext for the current thread. No need to
+ // lock AllThreadContexts at this point since no other threads have
+ // access yet to this GlobalContext object.
+ AllThreadContexts.push_back(new ThreadContext());
+ TLS = AllThreadContexts.back();
// Pre-register built-in stack names.
if (ALLOW_DUMP) {
+ // TODO(stichnot): There needs to be a strong relationship between
+ // the newTimerStackID() return values and TSK_Default/TSK_Funcs.
newTimerStackID("Total across all functions");
newTimerStackID("Per-function summary");
}
@@ -308,8 +315,13 @@
GlobalContext::~GlobalContext() {
llvm::DeleteContainerPointers(GlobalDeclarations);
+ llvm::DeleteContainerPointers(AllThreadContexts);
}
+// TODO(stichnot): Consider adding thread-local caches of constant
+// pool entries to reduce contention.
+
+// All locking is done by the getConstantInt[0-9]+() target function.
Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) {
switch (Ty) {
case IceType_i1:
@@ -330,44 +342,45 @@
Constant *GlobalContext::getConstantInt1(int8_t ConstantInt1) {
ConstantInt1 &= INT8_C(1);
- return ConstPool->Integers1.getOrAdd(this, ConstantInt1);
+ return getConstPool()->Integers1.getOrAdd(this, ConstantInt1);
}
Constant *GlobalContext::getConstantInt8(int8_t ConstantInt8) {
- return ConstPool->Integers8.getOrAdd(this, ConstantInt8);
+ return getConstPool()->Integers8.getOrAdd(this, ConstantInt8);
}
Constant *GlobalContext::getConstantInt16(int16_t ConstantInt16) {
- return ConstPool->Integers16.getOrAdd(this, ConstantInt16);
+ return getConstPool()->Integers16.getOrAdd(this, ConstantInt16);
}
Constant *GlobalContext::getConstantInt32(int32_t ConstantInt32) {
- return ConstPool->Integers32.getOrAdd(this, ConstantInt32);
+ return getConstPool()->Integers32.getOrAdd(this, ConstantInt32);
}
Constant *GlobalContext::getConstantInt64(int64_t ConstantInt64) {
- return ConstPool->Integers64.getOrAdd(this, ConstantInt64);
+ return getConstPool()->Integers64.getOrAdd(this, ConstantInt64);
}
Constant *GlobalContext::getConstantFloat(float ConstantFloat) {
- return ConstPool->Floats.getOrAdd(this, ConstantFloat);
+ return getConstPool()->Floats.getOrAdd(this, ConstantFloat);
}
Constant *GlobalContext::getConstantDouble(double ConstantDouble) {
- return ConstPool->Doubles.getOrAdd(this, ConstantDouble);
+ return getConstPool()->Doubles.getOrAdd(this, ConstantDouble);
}
Constant *GlobalContext::getConstantSym(RelocOffsetT Offset,
const IceString &Name,
bool SuppressMangling) {
- return ConstPool->Relocatables.getOrAdd(
+ return getConstPool()->Relocatables.getOrAdd(
this, RelocatableTuple(Offset, Name, SuppressMangling));
}
Constant *GlobalContext::getConstantUndef(Type Ty) {
- return ConstPool->Undefs.getOrAdd(this, Ty);
+ return getConstPool()->Undefs.getOrAdd(this, Ty);
}
+// All locking is done by the getConstant*() target function.
Constant *GlobalContext::getConstantZero(Type Ty) {
switch (Ty) {
case IceType_i1:
@@ -403,19 +416,19 @@
llvm_unreachable("Unknown type");
}
-ConstantList GlobalContext::getConstantPool(Type Ty) const {
+ConstantList GlobalContext::getConstantPool(Type Ty) {
switch (Ty) {
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
- return ConstPool->Integers32.getConstantPool();
+ return getConstPool()->Integers32.getConstantPool();
case IceType_i64:
- return ConstPool->Integers64.getConstantPool();
+ return getConstPool()->Integers64.getConstantPool();
case IceType_f32:
- return ConstPool->Floats.getConstantPool();
+ return getConstPool()->Floats.getConstantPool();
case IceType_f64:
- return ConstPool->Doubles.getConstantPool();
+ return getConstPool()->Doubles.getConstantPool();
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
@@ -435,6 +448,9 @@
llvm_unreachable("Unknown type");
}
+// No locking because only the bitcode parser thread calls it.
+// TODO(stichnot,kschimpf): GlobalContext::GlobalDeclarations actually
+// seems to be unused. If so, remove that field and this method.
FunctionDeclaration *
GlobalContext::newFunctionDeclaration(const FuncSigType *Signature,
unsigned CallingConv, unsigned Linkage,
@@ -446,65 +462,75 @@
return Func;
}
+// No locking because only the bitcode parser thread calls it.
+// TODO(stichnot,kschimpf): GlobalContext::GlobalDeclarations actually
+// seems to be unused. If so, remove that field and this method.
VariableDeclaration *GlobalContext::newVariableDeclaration() {
VariableDeclaration *Var = new VariableDeclaration();
GlobalDeclarations.push_back(Var);
return Var;
}
-TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
- const IceString &Name) {
- assert(StackID < Timers.size());
- return Timers[StackID].getTimerID(Name);
-}
-
TimerStackIdT GlobalContext::newTimerStackID(const IceString &Name) {
if (!ALLOW_DUMP)
return 0;
- TimerStackIdT NewID = Timers.size();
- Timers.push_back(TimerStack(Name));
+ auto Timers = getTimers();
+ TimerStackIdT NewID = Timers->size();
+ Timers->push_back(TimerStack(Name));
return NewID;
}
+TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
+ const IceString &Name) {
+ auto Timers = getTimers();
+ assert(StackID < Timers->size());
+ return Timers->at(StackID).getTimerID(Name);
+}
+
void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
- assert(StackID < Timers.size());
- Timers[StackID].push(ID);
+ auto Timers = getTimers();
+ assert(StackID < Timers->size());
+ Timers->at(StackID).push(ID);
}
void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
- assert(StackID < Timers.size());
- Timers[StackID].pop(ID);
+ auto Timers = getTimers();
+ assert(StackID < Timers->size());
+ Timers->at(StackID).pop(ID);
}
void GlobalContext::resetTimer(TimerStackIdT StackID) {
- assert(StackID < Timers.size());
- Timers[StackID].reset();
+ auto Timers = getTimers();
+ assert(StackID < Timers->size());
+ Timers->at(StackID).reset();
}
void GlobalContext::setTimerName(TimerStackIdT StackID,
const IceString &NewName) {
- assert(StackID < Timers.size());
- Timers[StackID].setName(NewName);
+ auto Timers = getTimers();
+ assert(StackID < Timers->size());
+ Timers->at(StackID).setName(NewName);
}
void GlobalContext::dumpStats(const IceString &Name, bool Final) {
- if (!ALLOW_DUMP)
+ if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
- if (Flags.DumpStats) {
- if (Final) {
- StatsCumulative.dump(Name, getStrDump());
- } else {
- StatsFunction.dump(Name, getStrDump());
- StatsCumulative.dump("_TOTAL_", getStrDump());
- }
+ OstreamLocker OL(this);
+ if (Final) {
+ getStatsCumulative()->dump(Name, getStrDump());
+ } else {
+ TLS->StatsFunction.dump(Name, getStrDump());
+ getStatsCumulative()->dump("_TOTAL_", getStrDump());
}
}
void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
if (!ALLOW_DUMP)
return;
- assert(Timers.size() > StackID);
- Timers[StackID].dump(getStrDump(), DumpCumulative);
+ auto Timers = getTimers();
+ assert(Timers->size() > StackID);
+ OstreamLocker L(this);
+ Timers->at(StackID).dump(getStrDump(), DumpCumulative);
}
TimerMarker::TimerMarker(TimerIdT ID, const Cfg *Func)
@@ -516,4 +542,6 @@
}
}
+thread_local GlobalContext::ThreadContext *GlobalContext::TLS;
+
} // end of namespace Ice
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index cf2478f..ab9c814 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -16,6 +16,7 @@
#define SUBZERO_SRC_ICEGLOBALCONTEXT_H
#include <memory>
+#include <mutex>
#include "IceDefs.h"
#include "IceClFlags.h"
@@ -27,40 +28,74 @@
namespace Ice {
class ClFlags;
+class ConstantPool;
class FuncSigType;
-// This class collects rudimentary statistics during translation.
-class CodeStats {
- CodeStats(const CodeStats &) = delete;
- CodeStats &operator=(const CodeStats &) = default;
+typedef std::mutex GlobalLockType;
+
+// LockedPtr is a way to provide automatically locked access to some object.
+template <typename T> class LockedPtr {
+ LockedPtr() = delete;
+ LockedPtr(const LockedPtr &) = delete;
+ LockedPtr &operator=(const LockedPtr &) = delete;
public:
- CodeStats()
- : InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
- Fills(0) {}
- void reset() { *this = CodeStats(); }
- void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
- void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
- void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
- void updateSpills() { ++Spills; }
- void updateFills() { ++Fills; }
- void dump(const IceString &Name, Ostream &Str);
+ LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) {
+ Lock->lock();
+ }
+ LockedPtr(LockedPtr &&Other) : Value(Other.Value), Lock(Other.Lock) {
+ Other.Value = nullptr;
+ Other.Lock = nullptr;
+ }
+ ~LockedPtr() { Lock->unlock(); }
+ T *operator->() const { return Value; }
private:
- uint32_t InstructionsEmitted;
- uint32_t RegistersSaved;
- uint32_t FrameBytes;
- uint32_t Spills;
- uint32_t Fills;
+ T *Value;
+ GlobalLockType *Lock;
};
-// TODO: Accesses to all non-const fields of GlobalContext need to
-// be synchronized, especially the constant pool, the allocator, and
-// the output streams.
class GlobalContext {
GlobalContext(const GlobalContext &) = delete;
GlobalContext &operator=(const GlobalContext &) = delete;
+ // CodeStats collects rudimentary statistics during translation.
+ class CodeStats {
+ CodeStats(const CodeStats &) = delete;
+ CodeStats &operator=(const CodeStats &) = default;
+
+ public:
+ CodeStats()
+ : InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
+ Fills(0) {}
+ void reset() { *this = CodeStats(); }
+ void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
+ void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
+ void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
+ void updateSpills() { ++Spills; }
+ void updateFills() { ++Fills; }
+ void dump(const IceString &Name, Ostream &Str);
+
+ private:
+ uint32_t InstructionsEmitted;
+ uint32_t RegistersSaved;
+ uint32_t FrameBytes;
+ uint32_t Spills;
+ uint32_t Fills;
+ };
+
+ // ThreadContext contains thread-local data. This data can be
+ // combined/reduced as needed after all threads complete.
+ class ThreadContext {
+ ThreadContext(const ThreadContext &) = delete;
+ ThreadContext &operator=(const ThreadContext &) = delete;
+
+ public:
+ ThreadContext() {}
+ CodeStats StatsFunction;
+ std::vector<TimerStack> Timers;
+ };
+
public:
GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer,
VerboseMask Mask, TargetArch Arch, OptLevel Opt,
@@ -76,6 +111,19 @@
void addVerbose(VerboseMask Mask) { VMask |= Mask; }
void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
+ // The dump and emit streams need to be used by only one thread at a
+ // time. This is done by exclusively reserving the streams via
+ // lockStr() and unlockStr(). The OstreamLocker class can be used
+ // to conveniently manage this.
+ //
+ // The model is that a thread grabs the stream lock, then does an
+ // arbitrary amount of work during which far-away callees may grab
+ // the stream and do something with it, and finally the thread
+ // releases the stream lock. This allows large chunks of output to
+ // be dumped or emitted without risking interleaving from multiple
+ // threads.
+ void lockStr() { StrLock.lock(); }
+ void unlockStr() { StrLock.unlock(); }
Ostream &getStrDump() { return *StrDump; }
Ostream &getStrEmit() { return *StrEmit; }
@@ -109,7 +157,7 @@
Constant *getConstantZero(Type Ty);
// getConstantPool() returns a copy of the constant pool for
// constants of a given type.
- ConstantList getConstantPool(Type Ty) const;
+ ConstantList getConstantPool(Type Ty);
// Returns a new function declaration, allocated in an internal
// memory pool. Ownership of the function is maintained by this
// class instance.
@@ -129,7 +177,7 @@
}
// Allocate data of type T using the global allocator.
- template <typename T> T *allocate() { return Allocator.Allocate<T>(); }
+ template <typename T> T *allocate() { return getAllocator()->Allocate<T>(); }
const Intrinsics &getIntrinsicsInfo() const { return IntrinsicsInfo; }
@@ -142,38 +190,38 @@
// Reset stats at the beginning of a function.
void resetStats() {
if (ALLOW_DUMP)
- StatsFunction.reset();
+ TLS->StatsFunction.reset();
}
void dumpStats(const IceString &Name, bool Final = false);
void statsUpdateEmitted(uint32_t InstCount) {
- if (!ALLOW_DUMP)
+ if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
- StatsFunction.updateEmitted(InstCount);
- StatsCumulative.updateEmitted(InstCount);
+ TLS->StatsFunction.updateEmitted(InstCount);
+ getStatsCumulative()->updateEmitted(InstCount);
}
void statsUpdateRegistersSaved(uint32_t Num) {
- if (!ALLOW_DUMP)
+ if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
- StatsFunction.updateRegistersSaved(Num);
- StatsCumulative.updateRegistersSaved(Num);
+ TLS->StatsFunction.updateRegistersSaved(Num);
+ getStatsCumulative()->updateRegistersSaved(Num);
}
void statsUpdateFrameBytes(uint32_t Bytes) {
- if (!ALLOW_DUMP)
+ if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
- StatsFunction.updateFrameBytes(Bytes);
- StatsCumulative.updateFrameBytes(Bytes);
+ TLS->StatsFunction.updateFrameBytes(Bytes);
+ getStatsCumulative()->updateFrameBytes(Bytes);
}
void statsUpdateSpills() {
- if (!ALLOW_DUMP)
+ if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
- StatsFunction.updateSpills();
- StatsCumulative.updateSpills();
+ TLS->StatsFunction.updateSpills();
+ getStatsCumulative()->updateSpills();
}
void statsUpdateFills() {
- if (!ALLOW_DUMP)
+ if (!ALLOW_DUMP || !getFlags().DumpStats)
return;
- StatsFunction.updateFills();
- StatsCumulative.updateFills();
+ TLS->StatsFunction.updateFills();
+ getStatsCumulative()->updateFills();
}
// These are predefined TimerStackIdT values.
@@ -183,8 +231,8 @@
TSK_Num
};
- TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
TimerStackIdT newTimerStackID(const IceString &Name);
+ TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
void resetTimer(TimerStackIdT StackID);
@@ -193,12 +241,24 @@
bool DumpCumulative = true);
private:
+ // Try to make sure the mutexes are allocated on separate cache
+ // lines, assuming the maximum cache line size is 64.
+ const static size_t MaxCacheLineSize = 64;
+ alignas(MaxCacheLineSize) GlobalLockType AllocLock;
+ alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock;
+ alignas(MaxCacheLineSize) GlobalLockType StatsLock;
+ alignas(MaxCacheLineSize) GlobalLockType TimerLock;
+
+ // StrLock is a global lock on the dump and emit output streams.
+ typedef std::mutex StrLockType;
+ StrLockType StrLock;
+
Ostream *StrDump; // Stream for dumping / diagnostics
Ostream *StrEmit; // Stream for code emission
ArenaAllocator<> Allocator;
VerboseMask VMask;
- std::unique_ptr<class ConstantPool> ConstPool;
+ std::unique_ptr<ConstantPool> ConstPool;
Intrinsics IntrinsicsInfo;
const TargetArch Arch;
const OptLevel Opt;
@@ -206,11 +266,28 @@
const ClFlags &Flags;
RandomNumberGenerator RNG;
std::unique_ptr<ELFObjectWriter> ObjectWriter;
- CodeStats StatsFunction;
CodeStats StatsCumulative;
std::vector<TimerStack> Timers;
std::vector<GlobalDeclaration *> GlobalDeclarations;
+ LockedPtr<ArenaAllocator<>> getAllocator() {
+ return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
+ }
+ LockedPtr<ConstantPool> getConstPool() {
+ return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);
+ }
+ LockedPtr<CodeStats> getStatsCumulative() {
+ return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
+ }
+ LockedPtr<std::vector<TimerStack>> getTimers() {
+ return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);
+ }
+
+ std::vector<ThreadContext *> AllThreadContexts;
+ // Each thread has its own TLS pointer which is also held in
+ // AllThreadContexts.
+ thread_local static ThreadContext *TLS;
+
// Private helpers for mangleName()
typedef llvm::SmallVector<char, 32> ManglerVector;
void incrementSubstitutions(ManglerVector &OldName) const;
@@ -245,6 +322,22 @@
bool Active;
};
+// Helper class for locking the streams and then automatically
+// unlocking them.
+class OstreamLocker {
+private:
+ OstreamLocker() = delete;
+ OstreamLocker(const OstreamLocker &) = delete;
+ OstreamLocker &operator=(const OstreamLocker &) = delete;
+
+public:
+ explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }
+ ~OstreamLocker() { Ctx->unlockStr(); }
+
+private:
+ GlobalContext *const Ctx;
+};
+
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
diff --git a/src/IceOperand.h b/src/IceOperand.h
index 6eb5eb9..3bbc0e3 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -525,7 +525,7 @@
VMK_SingleDefs, // Track uses+defs, but only record single def
VMK_All // Track uses+defs, including full def list
};
-typedef std::vector<const Inst *, CfgLocalAllocator<const Inst *> > InstDefList;
+typedef std::vector<const Inst *, CfgLocalAllocator<const Inst *>> InstDefList;
// VariableTracking tracks the metadata for a single variable. It is
// only meant to be used internally by VariablesMetadata.
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp
index 4ba82a0..5056bfd 100644
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -264,9 +264,11 @@
bool Randomized) {
TimerMarker T(TimerStack::TT_linearScan, Func);
assert(RegMaskFull.any()); // Sanity check
- Ostream &Str = Func->getContext()->getStrDump();
+ GlobalContext *Ctx = Func->getContext();
const bool Verbose =
- ALLOW_DUMP && Func->getContext()->isVerbose(IceV_LinearScan);
+ ALLOW_DUMP && Ctx->isVerbose(IceV_LinearScan);
+ if (Verbose)
+ Ctx->lockStr();
Func->resetCurrentNode();
VariablesMetadata *VMetadata = Func->getVMetadata();
const size_t NumRegisters = RegMaskFull.size();
@@ -300,6 +302,7 @@
Variable *Cur = Unhandled.back();
Unhandled.pop_back();
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "\nConsidering ";
dumpLiveRange(Cur, Func);
Str << "\n";
@@ -318,6 +321,7 @@
// RegNumTmp should have already been set above.
assert(Cur->getRegNumTmp() == RegNum);
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Precoloring ";
dumpLiveRange(Cur, Func);
Str << "\n";
@@ -340,6 +344,7 @@
if (Item->rangeEndsBefore(Cur)) {
// Move Item from Active to Handled list.
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Expiring ";
dumpLiveRange(Item, Func);
Str << "\n";
@@ -349,6 +354,7 @@
} else if (!Item->rangeOverlapsStart(Cur)) {
// Move Item from Active to Inactive list.
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Inactivating ";
dumpLiveRange(Item, Func);
Str << "\n";
@@ -373,6 +379,7 @@
if (Item->rangeEndsBefore(Cur)) {
// Move Item from Inactive to Handled list.
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Expiring ";
dumpLiveRange(Item, Func);
Str << "\n";
@@ -381,6 +388,7 @@
} else if (Item->rangeOverlapsStart(Cur)) {
// Move Item from Inactive to Active list.
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Reactivating ";
dumpLiveRange(Item, Func);
Str << "\n";
@@ -446,6 +454,7 @@
}
}
if (Verbose && Prefer) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Initial Prefer=";
Prefer->dump(Func);
Str << " R=" << PreferReg << " LIVE=" << Prefer->getLiveRange()
@@ -531,6 +540,7 @@
// Print info about physical register availability.
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
for (SizeT i = 0; i < RegMask.size(); ++i) {
if (RegMask[i]) {
Str << Func->getTarget()->getRegName(i, IceType_i32)
@@ -546,6 +556,7 @@
// allowed to overlap with its linked variable.
Cur->setRegNumTmp(PreferReg);
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Preferring ";
dumpLiveRange(Cur, Func);
Str << "\n";
@@ -560,6 +571,7 @@
int32_t RegNum = Free.find_first();
Cur->setRegNumTmp(RegNum);
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Allocating ";
dumpLiveRange(Cur, Func);
Str << "\n";
@@ -613,6 +625,7 @@
Variable *Item = Active[Index];
if (Item->getRegNumTmp() == MinWeightIndex) {
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Evicting ";
dumpLiveRange(Item, Func);
Str << "\n";
@@ -639,6 +652,7 @@
if (Item->getRegNumTmp() == MinWeightIndex &&
Item->rangeOverlaps(Cur)) {
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Evicting ";
dumpLiveRange(Item, Func);
Str << "\n";
@@ -653,6 +667,7 @@
++RegUses[MinWeightIndex];
Active.push_back(Cur);
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
Str << "Allocating ";
dumpLiveRange(Cur, Func);
Str << "\n";
@@ -686,6 +701,7 @@
AssignedRegNum = Permutation[RegNum];
}
if (Verbose) {
+ Ostream &Str = Ctx->getStrDump();
if (!Item->hasRegTmp()) {
Str << "Not assigning ";
Item->dump(Func);
@@ -712,6 +728,9 @@
// Another idea for coalescing stack slots is to initialize the
// Unhandled list with just the unallocated variables, saving time
// but not offering second-chance opportunities.
+
+ if (Verbose)
+ Ctx->unlockStr();
}
// ======================== Dump routines ======================== //
@@ -719,9 +738,9 @@
void LinearScan::dump(Cfg *Func) const {
if (!ALLOW_DUMP)
return;
- Ostream &Str = Func->getContext()->getStrDump();
if (!Func->getContext()->isVerbose(IceV_LinearScan))
return;
+ Ostream &Str = Func->getContext()->getStrDump();
Func->resetCurrentNode();
Str << "**** Current regalloc state:\n";
Str << "++++++ Handled:\n";
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index f1b746f..ea1f013 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -898,6 +898,7 @@
}
if (ALLOW_DUMP && Func->getContext()->isVerbose(IceV_Frame)) {
+ OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
@@ -1028,6 +1029,7 @@
Writer->writeConstantPool<ConstantFloat>(IceType_f32);
Writer->writeConstantPool<ConstantDouble>(IceType_f64);
} else {
+ OstreamLocker L(Ctx);
emitConstantPool<PoolTypeConverter<float>>();
emitConstantPool<PoolTypeConverter<double>>();
}
@@ -3567,6 +3569,7 @@
return;
if (!Func->getContext()->isVerbose(IceV_AddrOpt))
return;
+ OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Instruction: ";
Reason->dumpDecorated(Func);
@@ -3738,6 +3741,7 @@
Variable *&Index, uint16_t &Shift, int32_t &Offset) {
Func->resetCurrentNode();
if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
+ OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "\nStarting computeAddressOpt for instruction:\n ";
Instr->dumpDecorated(Func);
@@ -4579,6 +4583,7 @@
assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
if (Func->getContext()->isVerbose(IceV_Random)) {
+ OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Register equivalence classes:\n";
for (auto I : EquivalenceClasses) {
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index 2a50a41..c0331a8 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -50,14 +50,15 @@
}
bool Translator::checkIfUnnamedNameSafe(const IceString &Name, const char *Kind,
- const IceString &Prefix,
- Ostream &Stream) {
+ const IceString &Prefix) {
if (Name.find(Prefix) == 0) {
for (size_t i = Prefix.size(); i < Name.size(); ++i) {
if (!isdigit(Name[i])) {
return false;
}
}
+ OstreamLocker L(Ctx);
+ Ostream &Stream = Ctx->getStrDump();
Stream << "Warning : Default " << Kind << " prefix '" << Prefix
<< "' potentially conflicts with name '" << Name << "'.\n";
return true;
@@ -108,6 +109,7 @@
bool DisableTranslation = Ctx->getFlags().DisableTranslation;
const bool DumpGlobalVariables =
ALLOW_DUMP && Ctx->isVerbose() && Ctx->getFlags().VerboseFocusOn.empty();
+ OstreamLocker L(Ctx);
Ostream &Stream = Ctx->getStrDump();
const IceString &TranslateOnly = Ctx->getFlags().TranslateOnly;
for (const Ice::VariableDeclaration *Global : VariableDeclarations) {
diff --git a/src/IceTranslator.h b/src/IceTranslator.h
index 9aa56aa..a0c0409 100644
--- a/src/IceTranslator.h
+++ b/src/IceTranslator.h
@@ -67,7 +67,7 @@
/// Prefix to name unnamed names. Errors are put on Ostream.
/// Returns true if there isn't a potential conflict.
bool checkIfUnnamedNameSafe(const IceString &Name, const char *Kind,
- const IceString &Prefix, Ostream &Stream);
+ const IceString &Prefix);
protected:
GlobalContext *Ctx;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index beb4e22..a405d8b 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -166,6 +166,11 @@
: NaClBitcodeParser(Cursor), Translator(Translator), Header(Header),
ErrorStatus(ErrorStatus), NumErrors(0), NumFunctionIds(0),
NumFunctionBlocks(0), BlockParser(nullptr) {
+ // Note: This gives the reader uncontrolled access to the dump
+ // stream, which it can then use without locking. TODO(kschimpf):
+ // Consider reworking the LLVM side to use e.g. a callback for
+ // errors.
+ Ice::OstreamLocker L(Translator.getContext());
setErrStream(Translator.getContext()->getStrDump());
}
@@ -2814,8 +2819,7 @@
Decl->setName(Trans.createUnnamedName(Prefix, NameIndex));
++NameIndex;
} else {
- Trans.checkIfUnnamedNameSafe(Decl->getName(), Context, Prefix,
- Trans.getContext()->getStrDump());
+ Trans.checkIfUnnamedNameSafe(Decl->getName(), Context, Prefix);
}
}