Subzero: Basic Block Profiler.
BUG= None
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1147023007.
diff --git a/Makefile.standalone b/Makefile.standalone
index 6df7c40..82eff15 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -276,7 +276,7 @@
$(OBJDIR)/unittest: $(OBJDIR)
@mkdir -p $@
-RT_SRC := runtime/szrt.c runtime/szrt_ll.ll
+RT_SRC := runtime/szrt.c runtime/szrt_ll.ll runtime/szrt_profiler.c
RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o
runtime: $(RT_OBJ)
@@ -285,7 +285,7 @@
# even in a parallel build.
.INTERMEDIATE: runtime.is.built
$(RT_OBJ): runtime.is.built
-runtime.is.built: $(RT_SRC)
+runtime.is.built: $(RT_SRC) pydir/build-runtime.py
@echo ================ Building Subzero runtime ================
./pydir/build-runtime.py -v --pnacl-root $(PNACL_TOOLCHAIN_ROOT)
diff --git a/pydir/build-runtime.py b/pydir/build-runtime.py
index 81c8ee2..4d0bb41 100755
--- a/pydir/build-runtime.py
+++ b/pydir/build-runtime.py
@@ -23,11 +23,18 @@
'-o', obj
] + extra_args, echo=verbose)
shellcmd(['objcopy',
- '--localize-symbol=nacl_tp_tdb_offset',
- '--localize-symbol=nacl_tp_tls_offset',
+ '--strip-symbol=nacl_tp_tdb_offset',
+ '--strip-symbol=nacl_tp_tls_offset',
obj
], echo=verbose)
+def PartialLink(obj_files, extra_args, lib, verbose):
+ """Partially links a set of obj files into a final obj library."""
+ shellcmd(['ld',
+ '-o', lib,
+ '-r',
+ ] + extra_args + obj_files, echo=verbose)
+
def main():
"""Build the Subzero runtime support library for all architectures.
"""
@@ -72,12 +79,30 @@
], echo=args.verbose)
ll_files = ['{dir}/szrt.ll'.format(dir=tempdir),
'{srcdir}/szrt_ll.ll'.format(srcdir=srcdir)]
- # Translate tempdir/szrt.ll and srcdir/szrt_ll.ll to szrt_native_x8632.o
+
+ # Translate tempdir/szrt.ll and tempdir/szrt_ll.ll to
+ # szrt_native_x8632.tmp.o.
Translate(ll_files,
['-mtriple=i686', '-mcpu=pentium4m'],
- '{rtdir}/szrt_native_x8632.o'.format(rtdir=rtdir),
+ '{dir}/szrt_native_x8632.tmp.o'.format(dir=tempdir),
args.verbose)
- # Translate tempdir/szrt.ll and srcdir/szrt_ll.ll to szrt_sb_x8632.o
+ # Compile srcdir/szrt_profiler.c to tempdir/szrt_profiler_native_i686.o
+ shellcmd(['clang',
+ '-O2',
+ '-target=i686',
+ '-c',
+ '{srcdir}/szrt_profiler.c'.format(srcdir=srcdir),
+ '-o', '{dir}/szrt_profiler_native_x8632.o'.format(dir=tempdir)
+ ], echo=args.verbose)
+ # Writing full szrt_native_i686.o.
+ PartialLink(['{dir}/szrt_native_x8632.tmp.o'.format(dir=tempdir),
+ '{dir}/szrt_profiler_native_x8632.o'.format(dir=tempdir)
+ ], ['-m elf_i386'],
+ '{rtdir}/szrt_native_x8632.o'.format(rtdir=rtdir), args.verbose)
+
+ # Translate tempdir/szrt.ll and tempdir/szrt_ll.ll to szrt_sb_x8632.o
+ # The sandboxed library does not get the profiler helper function as the
+ # binaries are linked with -nostdlib.
Translate(ll_files,
['-mtriple=i686-nacl', '-mcpu=pentium4m'],
'{rtdir}/szrt_sb_x8632.o'.format(rtdir=rtdir),
diff --git a/pydir/szbuild.py b/pydir/szbuild.py
index c962d4c..1e19cf3 100755
--- a/pydir/szbuild.py
+++ b/pydir/szbuild.py
@@ -84,7 +84,10 @@
choices=['obj', 'asm', 'iasm'],
help='Output file type. Default %(default)s.')
argparser.add_argument('--sandbox', dest='sandbox', action='store_true',
- help='Enabled sandboxing in the translator')
+ help='Enable sandboxing in the translator')
+ argparser.add_argument('--enable-block-profile',
+ dest='enable_block_profile', action='store_true',
+ help='Enable basic block profiling.')
argparser.add_argument('--verbose', '-v', dest='verbose',
action='store_true',
help='Display some extra debugging output')
@@ -217,6 +220,8 @@
'-ffunction-sections',
'-fdata-sections'] if hybrid else []) +
(['-sandbox'] if args.sandbox else []) +
+ (['-enable-block-profile'] if
+ args.enable_block_profile and not args.sandbox else []) +
args.sz_args +
[pexe],
echo=args.verbose)
diff --git a/runtime/szrt_profiler.c b/runtime/szrt_profiler.c
new file mode 100644
index 0000000..e31692e
--- /dev/null
+++ b/runtime/szrt_profiler.c
@@ -0,0 +1,59 @@
+#include <stdint.h>
+#include <stdio.h>
+
+struct BlockProfileInfo {
+ uint64_t Counter;
+ const char *const BlockName;
+} __attribute__((aligned(8)));
+
+extern const struct BlockProfileInfo *__Sz_block_profile_info;
+
+static const char SubzeroLogo[] =
+ "\n"
+ "\n"
+ "__________________________________________________________________________"
+ "____________________________\n"
+ " _____/\\\\\\\\\\\\\\\\\\\\\\__________________/"
+ "\\\\\\_______________________________________________________________\n"
+ " "
+ "___/\\\\\\/////////\\\\\\_______________\\/"
+ "\\\\\\_______________________________________________________________\n"
+ " "
+ "__\\//\\\\\\______\\///________________\\/"
+ "\\\\\\_______________________________________________________________\n"
+ " "
+ "___\\////\\\\\\__________/\\\\\\____/\\\\\\_\\/\\\\\\_________/"
+ "\\\\\\\\\\\\\\\\\\\\\\_____/\\\\\\\\\\\\\\\\___/\\\\/\\\\\\\\\\\\\\____/"
+ "\\\\\\\\\\____\n"
+ " "
+ "______\\////\\\\\\______\\/\\\\\\___\\/\\\\\\_\\/\\\\\\\\\\\\\\\\\\__\\///"
+ "////\\\\\\/____/\\\\\\/////\\\\\\_\\/\\\\\\/////\\\\\\_/\\\\\\///"
+ "\\\\\\__\n"
+ " "
+ "_________\\////\\\\\\___\\/\\\\\\___\\/\\\\\\_\\/\\\\\\////\\\\\\______/"
+ "\\\\\\/_____/\\\\\\\\\\\\\\\\\\\\\\__\\/\\\\\\__\\///__/\\\\\\__\\//"
+ "\\\\\\_\n"
+ " "
+ "__/\\\\\\______\\//\\\\\\__\\/\\\\\\___\\/\\\\\\_\\/\\\\\\__\\/\\\\\\____/"
+ "\\\\\\/______\\//\\\\///////___\\/\\\\\\_______\\//\\\\\\__/\\\\\\__\n"
+ " "
+ "_\\///\\\\\\\\\\\\\\\\\\\\\\/___\\//\\\\\\\\\\\\\\\\\\__\\/"
+ "\\\\\\\\\\\\\\\\\\___/\\\\\\\\\\\\\\\\\\\\\\__\\//\\\\\\\\\\\\\\\\\\\\_\\/"
+ "\\\\\\________\\///\\\\\\\\\\/___\n"
+ " "
+ "___\\///////////______\\/////////___\\/////////___\\///////////____\\/////"
+ "/////__\\///___________\\/////_____\n"
+ " "
+ "__________________________________________________________________________"
+ "____________________________\n"
+ "\n"
+ "\n";
+
+void __Sz_profile_summary() {
+ printf("%s", SubzeroLogo);
+ for (const struct BlockProfileInfo **curr = &__Sz_block_profile_info;
+ *curr != NULL; ++curr) {
+ printf("%lld\t%s\n", (*curr)->Counter, (*curr)->BlockName);
+ }
+ fflush(stdout);
+}
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 50aa5ee..5c2f9ad 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -18,6 +18,7 @@
#include "IceClFlags.h"
#include "IceDefs.h"
#include "IceELFObjectWriter.h"
+#include "IceGlobalInits.h"
#include "IceInst.h"
#include "IceLiveness.h"
#include "IceOperand.h"
@@ -75,6 +76,69 @@
// is used for dumping the stack frame location of Variables.
bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
+namespace {
+constexpr char BlockNameGlobalPrefix[] = ".L$profiler$block_name$";
+constexpr char BlockStatsGlobalPrefix[] = ".L$profiler$block_info$";
+
+VariableDeclaration *nodeNameDeclaration(const IceString &NodeAsmName) {
+ VariableDeclaration *Var = VariableDeclaration::create();
+ Var->setName(BlockNameGlobalPrefix + NodeAsmName);
+ Var->setIsConstant(true);
+ Var->addInitializer(new VariableDeclaration::DataInitializer(
+ NodeAsmName.data(), NodeAsmName.size() + 1));
+ const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
+ Var->setAlignment(Int64ByteSize); // Wasteful, 32-bit could use 4 bytes.
+ return Var;
+}
+
+VariableDeclaration *
+blockProfilingInfoDeclaration(const IceString &NodeAsmName,
+ VariableDeclaration *NodeNameDeclaration) {
+ VariableDeclaration *Var = VariableDeclaration::create();
+ Var->setName(BlockStatsGlobalPrefix + NodeAsmName);
+ const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
+ Var->addInitializer(new VariableDeclaration::ZeroInitializer(Int64ByteSize));
+
+ const RelocOffsetT NodeNameDeclarationOffset = 0;
+ Var->addInitializer(new VariableDeclaration::RelocInitializer(
+ NodeNameDeclaration, NodeNameDeclarationOffset));
+ Var->setAlignment(Int64ByteSize);
+ return Var;
+}
+
+} // end of anonymous namespace
+
+void Cfg::profileBlocks() {
+ if (GlobalInits == nullptr)
+ GlobalInits.reset(new VariableDeclarationList());
+
+ for (CfgNode *Node : Nodes) {
+ IceString NodeAsmName = Node->getAsmName();
+ GlobalInits->push_back(nodeNameDeclaration(NodeAsmName));
+ GlobalInits->push_back(
+ blockProfilingInfoDeclaration(NodeAsmName, GlobalInits->back()));
+ Node->profileExecutionCount(GlobalInits->back());
+ }
+}
+
+bool Cfg::isProfileGlobal(const VariableDeclaration &Var) {
+ return Var.getName().find(BlockStatsGlobalPrefix) == 0;
+}
+
+void Cfg::addCallToProfileSummary() {
+ // The call(s) to __Sz_profile_summary are added by the profiler in functions
+ // that cause the program to exit. This function is defined in
+ // runtime/szrt_profiler.c.
+ Constant *ProfileSummarySym =
+ Ctx->getConstantExternSym("__Sz_profile_summary");
+ constexpr SizeT NumArgs = 0;
+ constexpr Variable *Void = nullptr;
+ constexpr bool HasTailCall = false;
+ auto *Call =
+ InstCall::create(this, NumArgs, Void, ProfileSummarySym, HasTailCall);
+ getEntryNode()->getInsts().push_front(Call);
+}
+
void Cfg::translate() {
if (hasError())
return;
@@ -99,6 +163,16 @@
dump("Initial CFG");
+ if (getContext()->getFlags().getEnableBlockProfile()) {
+ profileBlocks();
+ // TODO(jpp): this is fragile, at best. Figure out a better way of detecting
+ // exit functions.
+ if (GlobalContext::matchSymbolName(getFunctionName(), "exit")) {
+ addCallToProfileSummary();
+ }
+ dump("Profiled CFG");
+ }
+
// The set of translation passes and their order are determined by
// the target.
getTarget()->translate();
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 8f74d07..056812b 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -128,10 +128,17 @@
return static_cast<T *>(TargetAssembler.get());
}
Assembler *releaseAssembler() { return TargetAssembler.release(); }
+ std::unique_ptr<VariableDeclarationList> getGlobalInits() {
+ return std::move(GlobalInits);
+ }
bool hasComputedFrame() const;
bool getFocusedTiming() const { return FocusedTiming; }
void setFocusedTiming() { FocusedTiming = true; }
+ // Returns true if Var is a global variable that is used by the profiling
+ // code.
+ static bool isProfileGlobal(const VariableDeclaration &Var);
+
// Passes over the CFG.
void translate();
// After the CFG is fully constructed, iterate over the nodes and
@@ -188,6 +195,15 @@
private:
Cfg(GlobalContext *Ctx, uint32_t SequenceNumber);
+ // Adds a call to the ProfileSummary runtime function as the first instruction
+ // in this CFG's entry block.
+ void addCallToProfileSummary();
+
+ // Iterates over the basic blocks in this CFG, adding profiling code to each
+ // one of them. It returns a list with all the globals that the profiling code
+ // needs to be defined.
+ void profileBlocks();
+
GlobalContext *Ctx;
uint32_t SequenceNumber; // output order for emission
VerboseMask VMask;
@@ -209,6 +225,8 @@
std::unique_ptr<TargetLowering> Target;
std::unique_ptr<VariablesMetadata> VMetadata;
std::unique_ptr<Assembler> TargetAssembler;
+ // Globals required by this CFG. Mostly used for the profiler's globals.
+ std::unique_ptr<VariableDeclarationList> GlobalInits;
// CurrentNode is maintained during dumping/emitting just for
// validating Variable::DefNode. Normally, a traversal over
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index 87eee0f..68578b9 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -15,6 +15,7 @@
#include "assembler.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
+#include "IceGlobalInits.h"
#include "IceInst.h"
#include "IceLiveness.h"
#include "IceOperand.h"
@@ -1243,4 +1244,31 @@
}
}
+void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
+ constexpr char RMW_I64[] = "llvm.nacl.atomic.rmw.i64";
+
+ GlobalContext *Context = Func->getContext();
+
+ bool BadIntrinsic = false;
+ const Intrinsics::FullIntrinsicInfo *Info =
+ Context->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
+ assert(!BadIntrinsic);
+ assert(Info != nullptr);
+
+ Operand *RMWI64Name = Context->getConstantExternSym(RMW_I64);
+ Constant *Counter = Context->getConstantExternSym(Var->getName());
+ Constant *AtomicRMWOp = Context->getConstantInt32(Intrinsics::AtomicAdd);
+ Constant *One = Context->getConstantInt64(1);
+ Constant *OrderAcquireRelease =
+ Context->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
+
+ InstIntrinsicCall *Inst = InstIntrinsicCall::create(
+ Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
+ Inst->addArg(AtomicRMWOp);
+ Inst->addArg(Counter);
+ Inst->addArg(One);
+ Inst->addArg(OrderAcquireRelease);
+ Insts.push_front(Inst);
+}
+
} // end of namespace Ice
diff --git a/src/IceCfgNode.h b/src/IceCfgNode.h
index e4fe2f9..473c47e 100644
--- a/src/IceCfgNode.h
+++ b/src/IceCfgNode.h
@@ -91,6 +91,8 @@
void emitIAS(Cfg *Func) const;
void dump(Cfg *Func) const;
+ void profileExecutionCount(VariableDeclaration *Var);
+
private:
CfgNode(Cfg *Func, SizeT LabelIndex);
Cfg *const Func;
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index 203b54e..069c3e3 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -65,6 +65,12 @@
DumpStats("szstats",
cl::desc("Print statistics after translating each function"));
+cl::opt<bool> EnableBlockProfile(
+ "enable-block-profile",
+ cl::desc("If true, instrument basic blocks, and output profiling "
+ "information to stdout at the end of program execution."),
+ cl::init(false));
+
cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"));
@@ -261,6 +267,7 @@
OutFlags.DisableIRGeneration = false;
OutFlags.DisableTranslation = false;
OutFlags.DumpStats = false;
+ OutFlags.EnableBlockProfile = false;
OutFlags.FunctionSections = false;
OutFlags.GenerateUnitTestMessages = false;
OutFlags.PhiEdgeSplit = false;
@@ -311,6 +318,7 @@
OutFlags.setDisableIRGeneration(::DisableIRGeneration);
OutFlags.setDisableTranslation(::DisableTranslation);
OutFlags.setDumpStats(::DumpStats);
+ OutFlags.setEnableBlockProfile(::EnableBlockProfile);
OutFlags.setFunctionSections(::FunctionSections);
OutFlags.setNumTranslationThreads(::NumThreads);
OutFlags.setOptLevel(::OLevel);
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 517c77f..7df6973 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -65,6 +65,9 @@
bool getDumpStats() const { return ALLOW_DUMP && DumpStats; }
void setDumpStats(bool NewValue) { DumpStats = NewValue; }
+ bool getEnableBlockProfile() const { return EnableBlockProfile; }
+ void setEnableBlockProfile(bool NewValue) { EnableBlockProfile = NewValue; }
+
bool getFunctionSections() const { return FunctionSections; }
void setFunctionSections(bool NewValue) { FunctionSections = NewValue; }
@@ -182,6 +185,7 @@
bool DisableIRGeneration;
bool DisableTranslation;
bool DumpStats;
+ bool EnableBlockProfile;
bool FunctionSections;
bool GenerateUnitTestMessages;
bool PhiEdgeSplit;
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index aab663c..9761dde 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -383,9 +383,8 @@
for (VariableDeclaration::Initializer *Init : Var->getInitializers()) {
switch (Init->getKind()) {
case VariableDeclaration::Initializer::DataInitializerKind: {
- const auto Data =
- llvm::cast<VariableDeclaration::DataInitializer>(Init)
- ->getContents();
+ const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(
+ Init)->getContents();
Section->appendData(Str, llvm::StringRef(Data.data(), Data.size()));
break;
}
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 8de57f3..3b54c03 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Timer.h"
#include "IceCfg.h"
+#include "IceCfgNode.h"
#include "IceClFlags.h"
#include "IceDefs.h"
#include "IceELFObjectWriter.h"
@@ -277,6 +278,7 @@
Cfg::setCurrentCfg(nullptr);
continue; // Func goes out of scope and gets deleted
}
+
Func->translate();
EmitterWorkItem *Item = nullptr;
if (Func->hasError()) {
@@ -285,6 +287,7 @@
getStrError() << "ICE translation error: " << Func->getFunctionName()
<< ": " << Func->getError() << "\n";
Item = new EmitterWorkItem(Func->getSequenceNumber());
+ Item->setGlobalInits(Func->getGlobalInits());
} else {
Func->getAssembler<>()->setInternal(Func->getInternal());
switch (getFlags().getOutFileType()) {
@@ -299,11 +302,15 @@
// Copy relevant fields into Asm before Func is deleted.
Asm->setFunctionName(Func->getFunctionName());
Item = new EmitterWorkItem(Func->getSequenceNumber(), Asm);
+ Item->setGlobalInits(Func->getGlobalInits());
} break;
case FT_Asm:
// The Cfg has not been emitted yet, so stats are not ready
// to be dumped.
+ std::unique_ptr<VariableDeclarationList> GlobalInits =
+ Func->getGlobalInits();
Item = new EmitterWorkItem(Func->getSequenceNumber(), Func.release());
+ Item->setGlobalInits(std::move(GlobalInits));
break;
}
}
@@ -316,6 +323,43 @@
namespace {
+// Adds an array of pointers to all the profiler-generated globals. The
+// __Sz_profile_summary function iterates over this array for printing the
+// profiling counters.
+VariableDeclaration *blockProfileInfo(const VariableDeclarationList &Globals) {
+ auto *Var = VariableDeclaration::create();
+ Var->setAlignment(typeWidthInBytes(IceType_i64));
+ Var->setIsConstant(true);
+
+ // Note: if you change this symbol, make sure to update
+ // runtime/szrt_profiler.c as well.
+ Var->setName("__Sz_block_profile_info");
+ Var->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ for (const VariableDeclaration *Global : Globals) {
+ if (Cfg::isProfileGlobal(*Global)) {
+ constexpr RelocOffsetT BlockExecutionCounterOffset = 0;
+ Var->addInitializer(new VariableDeclaration::RelocInitializer(
+ Global, BlockExecutionCounterOffset));
+ }
+ }
+
+ // This adds a 64-bit sentinel entry to the end of our array. For 32-bit
+ // architectures this will waste 4 bytes.
+ const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64);
+ Var->addInitializer(
+ new VariableDeclaration::ZeroInitializer(Sizeof64BitNullPtr));
+
+ return Var;
+}
+
+void addBlockProfileInfoArrayToGlobals(VariableDeclarationList *Globals) {
+ // Purposefully create the Var temp to prevent bugs in case the compiler
+ // reorders instructions in a way that Globals is extended before the call
+ // to profileInfoArray.
+ VariableDeclaration *Var = blockProfileInfo(*Globals);
+ Globals->push_back(Var);
+}
+
void lowerGlobals(GlobalContext *Ctx,
std::unique_ptr<VariableDeclarationList> VariableDeclarations,
TargetDataLowering *DataLowering) {
@@ -331,6 +375,13 @@
}
if (Ctx->getFlags().getDisableTranslation())
return;
+
+ // There should be no need to emit the block_profile_info array if profiling
+ // is disabled. In practice, given that szrt_profiler.o will always be
+ // embedded in the application, we need to add it. In a non-profiled build
+ // this array will only contain the nullptr terminator.
+ addBlockProfileInfoArrayToGlobals(VariableDeclarations.get());
+
DataLowering->lowerGlobals(std::move(VariableDeclarations));
}
@@ -340,6 +391,13 @@
Pending.resize(Index + 1);
}
+void addAllIfNotNull(std::unique_ptr<VariableDeclarationList> src,
+ VariableDeclarationList *dst) {
+ if (src != nullptr) {
+ dst->insert(dst->end(), src->begin(), src->end());
+ }
+}
+
} // end of anonymous namespace
void GlobalContext::emitItems() {
@@ -350,6 +408,8 @@
// the work queue, and if it's not the item we're waiting for, we
// insert it into Pending and repeat. The work item is deleted
// after it is processed.
+ std::unique_ptr<VariableDeclarationList> GlobalInits(
+ new VariableDeclarationList());
std::vector<EmitterWorkItem *> Pending;
uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
while (true) {
@@ -359,7 +419,7 @@
if (RawItem == nullptr)
RawItem = emitQueueBlockingPop();
if (RawItem == nullptr)
- return;
+ break;
uint32_t ItemSeq = RawItem->getSequenceNumber();
if (Threaded && ItemSeq != DesiredSequenceNumber) {
resizePending(Pending, ItemSeq);
@@ -373,10 +433,10 @@
case EmitterWorkItem::WI_Nop:
break;
case EmitterWorkItem::WI_GlobalInits: {
- lowerGlobals(this, Item->getGlobalInits(),
- TargetDataLowering::createLowering(this).get());
+ addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
} break;
case EmitterWorkItem::WI_Asm: {
+ addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
std::unique_ptr<Assembler> Asm = Item->getAsm();
Asm->alignFunction();
IceString MangledName = mangleName(Asm->getFunctionName());
@@ -398,6 +458,9 @@
case EmitterWorkItem::WI_Cfg: {
if (!ALLOW_DUMP)
llvm::report_fatal_error("WI_Cfg work item created inappropriately");
+
+ addAllIfNotNull(Item->getGlobalInits(), GlobalInits.get());
+
assert(getFlags().getOutFileType() == FT_Asm);
std::unique_ptr<Cfg> Func = Item->getCfg();
// Unfortunately, we have to temporarily install the Cfg in TLS
@@ -410,6 +473,9 @@
} break;
}
}
+
+ lowerGlobals(this, std::move(GlobalInits),
+ TargetDataLowering::createLowering(this).get());
}
// Scan a string for S[0-9A-Z]*_ patterns and replace them with
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index b8b633a..90bb0e3 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -3119,11 +3119,10 @@
Func->setError("Unexpected memory ordering for AtomicRMW");
return;
}
- lowerAtomicRMW(
- Instr->getDest(),
- static_cast<uint32_t>(
- llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
- Instr->getArg(1), Instr->getArg(2));
+ lowerAtomicRMW(Instr->getDest(),
+ static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
+ Instr->getArg(0))->getValue()),
+ Instr->getArg(1), Instr->getArg(2));
return;
case Intrinsics::AtomicStore: {
if (!Intrinsics::isMemoryOrderValid(
diff --git a/src/IceThreading.cpp b/src/IceThreading.cpp
index 5576abb..5cdaaa8 100644
--- a/src/IceThreading.cpp
+++ b/src/IceThreading.cpp
@@ -30,8 +30,15 @@
: Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr),
RawFunc(F) {}
+void EmitterWorkItem::setGlobalInits(
+ std::unique_ptr<VariableDeclarationList> GloblInits) {
+ assert(getKind() == WI_Asm || getKind() == WI_Cfg);
+ GlobalInits = std::move(GloblInits);
+}
+
std::unique_ptr<VariableDeclarationList> EmitterWorkItem::getGlobalInits() {
- assert(getKind() == WI_GlobalInits);
+ assert(getKind() == WI_GlobalInits || getKind() == WI_Asm ||
+ getKind() == WI_Cfg);
return std::move(GlobalInits);
}
diff --git a/src/IceThreading.h b/src/IceThreading.h
index 9ae3b67..35e1bfb 100644
--- a/src/IceThreading.h
+++ b/src/IceThreading.h
@@ -190,6 +190,7 @@
EmitterWorkItem(uint32_t Seq, Cfg *F);
uint32_t getSequenceNumber() const { return Sequence; }
ItemKind getKind() const { return Kind; }
+ void setGlobalInits(std::unique_ptr<VariableDeclarationList> GloblInits);
std::unique_ptr<VariableDeclarationList> getGlobalInits();
std::unique_ptr<Assembler> getAsm();
std::unique_ptr<Cfg> getCfg();