| //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// |
| // |
| // The Subzero Code Generator |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief Defines aspects of the compilation that persist across multiple |
| /// functions. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "IceGlobalContext.h" |
| |
| #include "IceCfg.h" |
| #include "IceCfgNode.h" |
| #include "IceClFlags.h" |
| #include "IceDefs.h" |
| #include "IceELFObjectWriter.h" |
| #include "IceGlobalInits.h" |
| #include "IceLiveness.h" |
| #include "IceOperand.h" |
| #include "IceRevision.h" |
| #include "IceTargetLowering.h" |
| #include "IceTimerTree.h" |
| #include "IceTypes.def" |
| #include "IceTypes.h" |
| |
| #ifdef __clang__ |
| #pragma clang diagnostic push |
| #pragma clang diagnostic ignored "-Wunused-parameter" |
| #endif // __clang__ |
| |
| #include "llvm/Support/Timer.h" |
| |
| #ifdef __clang__ |
| #pragma clang diagnostic pop |
| #endif // __clang__ |
| |
| #include <algorithm> // max() |
| |
| namespace std { |
| template <> struct hash<Ice::RelocatableTuple> { |
| size_t operator()(const Ice::RelocatableTuple &Key) const { |
| // Use the relocatable's name, plus the hash of a combination of the number |
| // of OffsetExprs and the known, fixed offset for the reloc. We left shift |
| // the known relocatable by 5 trying to minimize the interaction between the |
| // bits in OffsetExpr.size() and Key.Offset. |
| return hash<Ice::SizeT>()(Key.Name.getID()) + |
| hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5)); |
| } |
| }; |
| } // end of namespace std |
| |
| namespace Ice { |
| |
| namespace { |
| |
| // Define the key comparison function for the constant pool's unordered_map, |
| // but only for key types of interest: integer types, floating point types, and |
| // the special RelocatableTuple. |
| template <typename KeyType, class Enable = void> struct KeyCompare {}; |
| |
| template <typename KeyType> |
| struct KeyCompare<KeyType, |
| typename std::enable_if< |
| std::is_integral<KeyType>::value || |
| std::is_same<KeyType, RelocatableTuple>::value>::type> { |
| bool operator()(const KeyType &Value1, const KeyType &Value2) const { |
| return Value1 == Value2; |
| } |
| }; |
| template <typename KeyType> |
| struct KeyCompare<KeyType, typename std::enable_if< |
| std::is_floating_point<KeyType>::value>::type> { |
| bool operator()(const KeyType &Value1, const KeyType &Value2) const { |
| return !memcmp(&Value1, &Value2, sizeof(KeyType)); |
| } |
| }; |
| |
| // Define a key comparison function for sorting the constant pool's values |
| // after they are dumped to a vector. This covers integer types, floating point |
| // types, and ConstantRelocatable values. |
| template <typename ValueType, class Enable = void> struct KeyCompareLess {}; |
| |
| template <typename ValueType> |
| struct KeyCompareLess<ValueType, |
| typename std::enable_if<std::is_floating_point< |
| typename ValueType::PrimType>::value>::type> { |
| bool operator()(const Constant *Const1, const Constant *Const2) const { |
| using CompareType = uint64_t; |
| static_assert(sizeof(typename ValueType::PrimType) <= sizeof(CompareType), |
| "Expected floating-point type of width 64-bit or less"); |
| typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue(); |
| typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue(); |
| // We avoid "V1<V2" because of NaN. |
| // We avoid "memcmp(&V1,&V2,sizeof(V1))<0" which depends on the |
| // endian-ness of the host system running Subzero. |
| // Instead, compare the result of bit_cast to uint64_t. |
| uint64_t I1 = 0, I2 = 0; |
| memcpy(&I1, &V1, sizeof(V1)); |
| memcpy(&I2, &V2, sizeof(V2)); |
| return I1 < I2; |
| } |
| }; |
| template <typename ValueType> |
| struct KeyCompareLess<ValueType, |
| typename std::enable_if<std::is_integral< |
| typename ValueType::PrimType>::value>::type> { |
| bool operator()(const Constant *Const1, const Constant *Const2) const { |
| typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue(); |
| typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue(); |
| return V1 < V2; |
| } |
| }; |
| template <typename ValueType> |
| struct KeyCompareLess< |
| ValueType, typename std::enable_if< |
| std::is_same<ValueType, ConstantRelocatable>::value>::type> { |
| bool operator()(const Constant *Const1, const Constant *Const2) const { |
| auto *V1 = llvm::cast<ValueType>(Const1); |
| auto *V2 = llvm::cast<ValueType>(Const2); |
| if (V1->getName() == V2->getName()) |
| return V1->getOffset() < V2->getOffset(); |
| return V1->getName() < V2->getName(); |
| } |
| }; |
| |
| // TypePool maps constants of type KeyType (e.g. float) to pointers to |
| // type ValueType (e.g. ConstantFloat). |
| template <Type Ty, typename KeyType, typename ValueType> class TypePool { |
| TypePool(const TypePool &) = delete; |
| TypePool &operator=(const TypePool &) = delete; |
| |
| public: |
| TypePool() = default; |
| ValueType *getOrAdd(GlobalContext *Ctx, KeyType Key) { |
| auto Iter = Pool.find(Key); |
| if (Iter != Pool.end()) { |
| Iter->second->updateLookupCount(); |
| return Iter->second; |
| } |
| auto *Result = ValueType::create(Ctx, Ty, Key); |
| Pool[Key] = Result; |
| Result->updateLookupCount(); |
| return Result; |
| } |
| ConstantList getConstantPool() const { |
| ConstantList Constants; |
| Constants.reserve(Pool.size()); |
| for (auto &I : Pool) |
| Constants.push_back(I.second); |
| // The sort (and its KeyCompareLess machinery) is not strictly necessary, |
| // but is desirable for producing output that is deterministic across |
| // unordered_map::iterator implementations. |
| std::sort(Constants.begin(), Constants.end(), KeyCompareLess<ValueType>()); |
| return Constants; |
| } |
| size_t size() const { return Pool.size(); } |
| |
| private: |
| // Use the default hash function, and a custom key comparison function. The |
| // key comparison function for floating point variables can't use the default |
| // == based implementation because of special C++ semantics regarding +0.0, |
| // -0.0, and NaN comparison. However, it's OK to use the default hash for |
| // floating point values because KeyCompare is the final source of truth - in |
| // the worst case a "false" collision must be resolved. |
| using ContainerType = |
| std::unordered_map<KeyType, ValueType *, std::hash<KeyType>, |
| KeyCompare<KeyType>>; |
| ContainerType Pool; |
| }; |
| |
| // UndefPool maps ICE types to the corresponding ConstantUndef values. |
| class UndefPool { |
| UndefPool(const UndefPool &) = delete; |
| UndefPool &operator=(const UndefPool &) = delete; |
| |
| public: |
| UndefPool() : Pool(IceType_NUM) {} |
| |
| ConstantUndef *getOrAdd(GlobalContext *Ctx, Type Ty) { |
| if (Pool[Ty] == nullptr) |
| Pool[Ty] = ConstantUndef::create(Ctx, Ty); |
| return Pool[Ty]; |
| } |
| |
| private: |
| std::vector<ConstantUndef *> Pool; |
| }; |
| |
| } // end of anonymous namespace |
| |
| // The global constant pool bundles individual pools of each type of |
| // interest. |
| class ConstantPool { |
| ConstantPool(const ConstantPool &) = delete; |
| ConstantPool &operator=(const ConstantPool &) = delete; |
| |
| public: |
| ConstantPool() = default; |
| TypePool<IceType_f32, float, ConstantFloat> Floats; |
| TypePool<IceType_f64, double, ConstantDouble> Doubles; |
| TypePool<IceType_i1, int8_t, ConstantInteger32> Integers1; |
| TypePool<IceType_i8, int8_t, ConstantInteger32> Integers8; |
| TypePool<IceType_i16, int16_t, ConstantInteger32> Integers16; |
| TypePool<IceType_i32, int32_t, ConstantInteger32> Integers32; |
| TypePool<IceType_i64, int64_t, ConstantInteger64> Integers64; |
| TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> Relocatables; |
| TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> |
| ExternRelocatables; |
| UndefPool Undefs; |
| }; |
| |
| void GlobalContext::waitForWorkerThreads() { |
| if (WaitForWorkerThreadsCalled.exchange(true)) |
| return; |
| optQueueNotifyEnd(); |
| for (std::thread &Worker : TranslationThreads) { |
| Worker.join(); |
| } |
| TranslationThreads.clear(); |
| |
| // Only notify the emit queue to end after all the translation threads have |
| // ended. |
| emitQueueNotifyEnd(); |
| for (std::thread &Worker : EmitterThreads) { |
| Worker.join(); |
| } |
| EmitterThreads.clear(); |
| |
| if (BuildDefs::timers()) { |
| auto Timers = getTimers(); |
| for (ThreadContext *TLS : AllThreadContexts) |
| Timers->mergeFrom(TLS->Timers); |
| } |
| if (BuildDefs::dump()) { |
| // Do a separate loop over AllThreadContexts to avoid holding two locks at |
| // once. |
| auto Stats = getStatsCumulative(); |
| for (ThreadContext *TLS : AllThreadContexts) |
| Stats->add(TLS->StatsCumulative); |
| } |
| } |
| |
| void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) { |
| if (!BuildDefs::dump()) |
| return; |
| OstreamLocker _(Ctx); |
| Ostream &Str = Ctx->getStrDump(); |
| const std::string Name = |
| (Func == nullptr ? "_FINAL_" : Func->getFunctionNameAndSize()); |
| #define X(str, tag) \ |
| Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n"; |
| CODESTATS_TABLE |
| #undef X |
| Str << "|" << Name << "|Spills+Fills|" |
| << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n"; |
| Str << "|" << Name << "|Memory Usage |"; |
| if (const auto MemUsed = static_cast<size_t>( |
| llvm::TimeRecord::getCurrentTime(false).getMemUsed())) { |
| static constexpr size_t _1MB = 1024 * 1024; |
| Str << (MemUsed / _1MB) << " MB"; |
| } else { |
| Str << "(requires '-track-memory')"; |
| } |
| Str << "\n"; |
| Str << "|" << Name << "|CPool Sizes "; |
| { |
| auto Pool = Ctx->getConstPool(); |
| Str << "|f32=" << Pool->Floats.size(); |
| Str << "|f64=" << Pool->Doubles.size(); |
| Str << "|i1=" << Pool->Integers1.size(); |
| Str << "|i8=" << Pool->Integers8.size(); |
| Str << "|i16=" << Pool->Integers16.size(); |
| Str << "|i32=" << Pool->Integers32.size(); |
| Str << "|i64=" << Pool->Integers64.size(); |
| Str << "|Rel=" << Pool->Relocatables.size(); |
| Str << "|ExtRel=" << Pool->ExternRelocatables.size(); |
| } |
| Str << "\n"; |
| if (Func != nullptr) { |
| Str << "|" << Name << "|Cfg Memory |" << Func->getTotalMemoryMB() |
| << " MB\n"; |
| Str << "|" << Name << "|Liveness Memory |" << Func->getLivenessMemoryMB() |
| << " MB\n"; |
| } |
| } |
| |
| namespace { |
| |
| // By default, wake up the main parser thread when the OptQ gets half empty. |
| static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1; |
| |
| } // end of anonymous namespace |
| |
| GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, |
| ELFStreamer *ELFStr) |
| : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(), |
| StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this), |
| ObjectWriter(), |
| OptQWakeupSize(std::max(DefaultOptQWakeupSize, |
| size_t(getFlags().getNumTranslationThreads()))), |
| OptQ(/*Sequential=*/getFlags().isSequential(), |
| /*MaxSize=*/ |
| getFlags().isParseParallel() |
| ? MaxOptQSize |
| : getFlags().getNumTranslationThreads()), |
| // EmitQ is allowed unlimited size. |
| EmitQ(/*Sequential=*/getFlags().isSequential()), |
| DataLowering(TargetDataLowering::createLowering(this)) { |
| assert(OsDump && "OsDump is not defined for GlobalContext"); |
| assert(OsEmit && "OsEmit is not defined for GlobalContext"); |
| assert(OsError && "OsError is not defined for GlobalContext"); |
| // Make sure thread_local fields are properly initialized before any |
| // accesses are made. Do this here instead of at the start of |
| // main() so that all clients (e.g. unit tests) can benefit for |
| // free. |
| GlobalContext::TlsInit(); |
| Cfg::TlsInit(); |
| Liveness::TlsInit(); |
| // Create a new ThreadContext for the current thread. No need to |
| // lock AllThreadContexts at this point since no other threads have |
| // access yet to this GlobalContext object. |
| ThreadContext *MyTLS = new ThreadContext(); |
| AllThreadContexts.push_back(MyTLS); |
| ICE_TLS_SET_FIELD(TLS, MyTLS); |
| // Pre-register built-in stack names. |
| if (BuildDefs::timers()) { |
| // TODO(stichnot): There needs to be a strong relationship between |
| // the newTimerStackID() return values and TSK_Default/TSK_Funcs. |
| newTimerStackID("Total across all functions"); |
| newTimerStackID("Per-function summary"); |
| } |
| Timers.initInto(MyTLS->Timers); |
| switch (getFlags().getOutFileType()) { |
| case FT_Elf: |
| ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); |
| break; |
| case FT_Asm: |
| case FT_Iasm: |
| break; |
| } |
| // Cache up front common constants. |
| #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \ |
| ConstZeroForType[IceType_##tag] = getConstantZeroInternal(IceType_##tag); |
| ICETYPE_TABLE; |
| #undef X |
| ConstantTrue = getConstantInt1Internal(1); |
| // Define runtime helper functions. |
| #define X(Tag, Name) \ |
| RuntimeHelperFunc[static_cast<size_t>(RuntimeHelper::H_##Tag)] = \ |
| getConstantExternSym(getGlobalString(Name)); |
| RUNTIME_HELPER_FUNCTIONS_TABLE |
| #undef X |
| |
| TargetLowering::staticInit(this); |
| |
| if (getFlags().getEmitRevision()) { |
| // Embed the Subzero revision into the compiled binary by creating a special |
| // global variable initialized with the revision string. |
| auto *Revision = VariableDeclaration::create(&Globals, true); |
| Revision->setName(this, "__Sz_revision"); |
| Revision->setIsConstant(true); |
| const char *RevisionString = getSubzeroRevision(); |
| Revision->addInitializer(VariableDeclaration::DataInitializer::create( |
| &Globals, RevisionString, 1 + strlen(RevisionString))); |
| Globals.push_back(Revision); |
| } |
| } |
| |
| void GlobalContext::translateFunctionsWrapper(ThreadContext *MyTLS) { |
| ICE_TLS_SET_FIELD(TLS, MyTLS); |
| translateFunctions(); |
| } |
| |
| void GlobalContext::translateFunctions() { |
| TimerMarker Timer(TimerStack::TT_translateFunctions, this); |
| while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) { |
| std::unique_ptr<EmitterWorkItem> Item; |
| auto Func = OptItem->getParsedCfg(); |
| // Install Func in TLS for Cfg-specific container allocators. |
| CfgLocalAllocatorScope _(Func.get()); |
| // Reset per-function stats being accumulated in TLS. |
| resetStats(); |
| // Set verbose level to none if the current function does NOT match the |
| // -verbose-focus command-line option. |
| if (!getFlags().matchVerboseFocusOn(Func->getFunctionName(), |
| Func->getSequenceNumber())) |
| Func->setVerbose(IceV_None); |
| // Disable translation if -notranslate is specified, or if the current |
| // function matches the -translate-only option. If translation is disabled, |
| // just dump the high-level IR and continue. |
| if (getFlags().getDisableTranslation() || |
| !getFlags().matchTranslateOnly(Func->getFunctionName(), |
| Func->getSequenceNumber())) { |
| Func->dump(); |
| // Add a dummy work item as a placeholder. This maintains sequence |
| // numbers so that the emitter thread will emit subsequent functions. |
| Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber()); |
| emitQueueBlockingPush(std::move(Item)); |
| continue; // Func goes out of scope and gets deleted |
| } |
| |
| Func->translate(); |
| if (Func->hasError()) { |
| getErrorStatus()->assign(EC_Translation); |
| OstreamLocker L(this); |
| getStrError() << "ICE translation error: " << Func->getFunctionName() |
| << ": " << Func->getError() << ": " |
| << Func->getFunctionNameAndSize() << "\n"; |
| Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber()); |
| } else { |
| Func->getAssembler<>()->setInternal(Func->getInternal()); |
| switch (getFlags().getOutFileType()) { |
| case FT_Elf: |
| case FT_Iasm: { |
| Func->emitIAS(); |
| // The Cfg has already emitted into the assembly buffer, so |
| // stats have been fully collected into this thread's TLS. |
| // Dump them before TLS is reset for the next Cfg. |
| if (BuildDefs::dump()) |
| dumpStats(Func.get()); |
| auto Asm = Func->releaseAssembler(); |
| // Copy relevant fields into Asm before Func is deleted. |
| Asm->setFunctionName(Func->getFunctionName()); |
| Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(), |
| std::move(Asm)); |
| Item->setGlobalInits(Func->getGlobalInits()); |
| } break; |
| case FT_Asm: |
| // The Cfg has not been emitted yet, so stats are not ready |
| // to be dumped. |
| std::unique_ptr<VariableDeclarationList> GlobalInits = |
| Func->getGlobalInits(); |
| Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(), |
| std::move(Func)); |
| Item->setGlobalInits(std::move(GlobalInits)); |
| break; |
| } |
| } |
| assert(Item != nullptr); |
| emitQueueBlockingPush(std::move(Item)); |
| // The Cfg now gets deleted as Func goes out of scope. |
| } |
| } |
| |
| namespace { |
| |
| // Ensure Pending is large enough that Pending[Index] is valid. |
| void resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> *Pending, |
| uint32_t Index) { |
| if (Index >= Pending->size()) |
| Utils::reserveAndResize(*Pending, Index + 1); |
| } |
| |
| } // end of anonymous namespace |
| |
| // static |
| void GlobalContext::TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
| |
| void GlobalContext::emitFileHeader() { |
| TimerMarker T1(Ice::TimerStack::TT_emitAsm, this); |
| if (getFlags().getOutFileType() == FT_Elf) { |
| getObjectWriter()->writeInitialELFHeader(); |
| } else { |
| if (!BuildDefs::dump()) { |
| getStrError() << "emitFileHeader for non-ELF"; |
| getErrorStatus()->assign(EC_Translation); |
| } |
| TargetHeaderLowering::createLowering(this)->lower(); |
| } |
| } |
| |
| void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); } |
| |
| void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); } |
| |
| void GlobalContext::emitTargetRODataSections() { |
| DataLowering->emitTargetRODataSections(); |
| } |
| |
| void GlobalContext::saveBlockInfoPtrs() { |
| for (VariableDeclaration *Global : Globals) { |
| if (Cfg::isProfileGlobal(*Global)) { |
| ProfileBlockInfos.push_back(Global); |
| } |
| } |
| } |
| |
| void GlobalContext::lowerGlobals(const std::string &SectionSuffix) { |
| TimerMarker T(TimerStack::TT_emitGlobalInitializers, this); |
| const bool DumpGlobalVariables = |
| BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit) && |
| getFlags().matchVerboseFocusOn("", 0); |
| if (DumpGlobalVariables) { |
| OstreamLocker L(this); |
| Ostream &Stream = getStrDump(); |
| for (const Ice::VariableDeclaration *Global : Globals) { |
| Global->dump(Stream); |
| } |
| } |
| if (getFlags().getDisableTranslation()) |
| return; |
| |
| saveBlockInfoPtrs(); |
| // If we need to shuffle the layout of global variables, shuffle them now. |
| if (getFlags().getReorderGlobalVariables()) { |
| // Create a random number generator for global variable reordering. |
| RandomNumberGenerator RNG(getFlags().getRandomSeed(), |
| RPE_GlobalVariableReordering); |
| RandomShuffle(Globals.begin(), Globals.end(), |
| [&RNG](int N) { return (uint32_t)RNG.next(N); }); |
| } |
| |
| if (!BuildDefs::minimal() && Instrumentor) |
| Instrumentor->instrumentGlobals(Globals); |
| |
| DataLowering->lowerGlobals(Globals, SectionSuffix); |
| if (ProfileBlockInfos.empty() && DisposeGlobalVariablesAfterLowering) { |
| Globals.clearAndPurge(); |
| } else { |
| Globals.clear(); |
| } |
| } |
| |
| void GlobalContext::lowerProfileData() { |
| // ProfileBlockInfoVarDecl is initialized in the constructor, and will only |
| // ever be nullptr after this method completes. This assertion is a convoluted |
| // way of ensuring lowerProfileData is invoked a single time. |
| assert(ProfileBlockInfoVarDecl == nullptr); |
| |
| auto GlobalVariablePool = getInitializerAllocator(); |
| ProfileBlockInfoVarDecl = |
| VariableDeclaration::createExternal(GlobalVariablePool.get()); |
| ProfileBlockInfoVarDecl->setAlignment(typeWidthInBytes(IceType_i64)); |
| ProfileBlockInfoVarDecl->setIsConstant(true); |
| |
| // Note: if you change this symbol, make sure to update |
| // runtime/szrt_profiler.c as well. |
| ProfileBlockInfoVarDecl->setName(this, "__Sz_block_profile_info"); |
| |
| for (const VariableDeclaration *PBI : ProfileBlockInfos) { |
| if (Cfg::isProfileGlobal(*PBI)) { |
| constexpr RelocOffsetT BlockExecutionCounterOffset = 0; |
| ProfileBlockInfoVarDecl->addInitializer( |
| VariableDeclaration::RelocInitializer::create( |
| GlobalVariablePool.get(), PBI, |
| {RelocOffset::create(this, BlockExecutionCounterOffset)})); |
| } |
| } |
| |
| // This adds a 64-bit sentinel entry to the end of our array. For 32-bit |
| // architectures this will waste 4 bytes. |
| const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64); |
| ProfileBlockInfoVarDecl->addInitializer( |
| VariableDeclaration::ZeroInitializer::create(GlobalVariablePool.get(), |
| Sizeof64BitNullPtr)); |
| Globals.push_back(ProfileBlockInfoVarDecl); |
| constexpr char ProfileDataSection[] = "$sz_profiler$"; |
| lowerGlobals(ProfileDataSection); |
| } |
| |
| void GlobalContext::emitterWrapper(ThreadContext *MyTLS) { |
| ICE_TLS_SET_FIELD(TLS, MyTLS); |
| emitItems(); |
| } |
| |
| void GlobalContext::emitItems() { |
| const bool Threaded = !getFlags().isSequential(); |
| // Pending is a vector containing the reassembled, ordered list of |
| // work items. When we're ready for the next item, we first check |
| // whether it's in the Pending list. If not, we take an item from |
| // the work queue, and if it's not the item we're waiting for, we |
| // insert it into Pending and repeat. The work item is deleted |
| // after it is processed. |
| std::vector<std::unique_ptr<EmitterWorkItem>> Pending; |
| uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); |
| uint32_t ShuffleStartIndex = DesiredSequenceNumber; |
| uint32_t ShuffleEndIndex = DesiredSequenceNumber; |
| bool EmitQueueEmpty = false; |
| const uint32_t ShuffleWindowSize = |
| std::max(1u, getFlags().getReorderFunctionsWindowSize()); |
| bool Shuffle = Threaded && getFlags().getReorderFunctions(); |
| // Create a random number generator for function reordering. |
| RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_FunctionReordering); |
| |
| while (!EmitQueueEmpty) { |
| resizePending(&Pending, DesiredSequenceNumber); |
| // See if Pending contains DesiredSequenceNumber. |
| if (Pending[DesiredSequenceNumber] == nullptr) { |
| // We need to fetch an EmitterWorkItem from the queue. |
| auto RawItem = emitQueueBlockingPop(); |
| if (RawItem == nullptr) { |
| // This is the notifier for an empty queue. |
| EmitQueueEmpty = true; |
| } else { |
| // We get an EmitterWorkItem, we need to add it to Pending. |
| uint32_t ItemSeq = RawItem->getSequenceNumber(); |
| if (Threaded && ItemSeq != DesiredSequenceNumber) { |
| // Not the desired one, add it to Pending but do not increase |
| // DesiredSequenceNumber. Continue the loop, do not emit the item. |
| resizePending(&Pending, ItemSeq); |
| Pending[ItemSeq] = std::move(RawItem); |
| continue; |
| } |
| // ItemSeq == DesiredSequenceNumber, we need to check if we should |
| // emit it or not. If !Threaded, we're OK with ItemSeq != |
| // DesiredSequenceNumber. |
| Pending[DesiredSequenceNumber] = std::move(RawItem); |
| } |
| } |
| const auto *CurrentWorkItem = Pending[DesiredSequenceNumber].get(); |
| |
| // We have the desired EmitterWorkItem or nullptr as the end notifier. |
| // If the emitter queue is not empty, increase DesiredSequenceNumber and |
| // ShuffleEndIndex. |
| if (!EmitQueueEmpty) { |
| DesiredSequenceNumber++; |
| ShuffleEndIndex++; |
| } |
| |
| if (Shuffle) { |
| // Continue fetching EmitterWorkItem if function reordering is turned on, |
| // and emit queue is not empty, and the number of consecutive pending |
| // items is smaller than the window size, and RawItem is not a |
| // WI_GlobalInits kind. Emit WI_GlobalInits kind block first to avoid |
| // holding an arbitrarily large GlobalDeclarationList. |
| if (!EmitQueueEmpty && |
| ShuffleEndIndex - ShuffleStartIndex < ShuffleWindowSize && |
| CurrentWorkItem->getKind() != EmitterWorkItem::WI_GlobalInits) |
| continue; |
| |
| // Emit the EmitterWorkItem between Pending[ShuffleStartIndex] to |
| // Pending[ShuffleEndIndex]. If function reordering turned on, shuffle the |
| // pending items from Pending[ShuffleStartIndex] to |
| // Pending[ShuffleEndIndex]. |
| RandomShuffle(Pending.begin() + ShuffleStartIndex, |
| Pending.begin() + ShuffleEndIndex, |
| [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); }); |
| } |
| |
| // Emit the item from ShuffleStartIndex to ShuffleEndIndex. |
| for (uint32_t I = ShuffleStartIndex; I < ShuffleEndIndex; I++) { |
| std::unique_ptr<EmitterWorkItem> Item = std::move(Pending[I]); |
| |
| switch (Item->getKind()) { |
| case EmitterWorkItem::WI_Nop: |
| break; |
| case EmitterWorkItem::WI_GlobalInits: { |
| accumulateGlobals(Item->getGlobalInits()); |
| } break; |
| case EmitterWorkItem::WI_Asm: { |
| lowerGlobalsIfNoCodeHasBeenSeen(); |
| accumulateGlobals(Item->getGlobalInits()); |
| |
| std::unique_ptr<Assembler> Asm = Item->getAsm(); |
| Asm->alignFunction(); |
| GlobalString Name = Asm->getFunctionName(); |
| switch (getFlags().getOutFileType()) { |
| case FT_Elf: |
| getObjectWriter()->writeFunctionCode(Name, Asm->getInternal(), |
| Asm.get()); |
| break; |
| case FT_Iasm: { |
| OstreamLocker L(this); |
| Cfg::emitTextHeader(Name, this, Asm.get()); |
| Asm->emitIASBytes(this); |
| } break; |
| case FT_Asm: |
| llvm::report_fatal_error("Unexpected FT_Asm"); |
| break; |
| } |
| } break; |
| case EmitterWorkItem::WI_Cfg: { |
| if (!BuildDefs::dump()) |
| llvm::report_fatal_error("WI_Cfg work item created inappropriately"); |
| lowerGlobalsIfNoCodeHasBeenSeen(); |
| accumulateGlobals(Item->getGlobalInits()); |
| |
| assert(getFlags().getOutFileType() == FT_Asm); |
| std::unique_ptr<Cfg> Func = Item->getCfg(); |
| // Unfortunately, we have to temporarily install the Cfg in TLS |
| // because Variable::asType() uses the allocator to create the |
| // differently-typed copy. |
| CfgLocalAllocatorScope _(Func.get()); |
| Func->emit(); |
| dumpStats(Func.get()); |
| } break; |
| } |
| } |
| // Update the start index for next shuffling queue |
| ShuffleStartIndex = ShuffleEndIndex; |
| } |
| |
| // In case there are no code to be generated, we invoke the conditional |
| // lowerGlobals again -- this is a no-op if code has been emitted. |
| lowerGlobalsIfNoCodeHasBeenSeen(); |
| } |
| |
| GlobalContext::~GlobalContext() { |
| llvm::DeleteContainerPointers(AllThreadContexts); |
| LockedPtr<DestructorArray> Dtors = getDestructors(); |
| // Destructors are invoked in the opposite object construction order. |
| for (const auto &Dtor : reverse_range(*Dtors)) |
| Dtor(); |
| } |
| |
| void GlobalContext::dumpStrings() { |
| if (!getFlags().getDumpStrings()) |
| return; |
| OstreamLocker _(this); |
| Ostream &Str = getStrDump(); |
| Str << "GlobalContext strings:\n"; |
| getStrings()->dump(Str); |
| } |
| |
| void GlobalContext::dumpConstantLookupCounts() { |
| if (!BuildDefs::dump()) |
| return; |
| const bool DumpCounts = (getFlags().getVerbose() & IceV_ConstPoolStats) && |
| getFlags().matchVerboseFocusOn("", 0); |
| if (!DumpCounts) |
| return; |
| |
| OstreamLocker _(this); |
| Ostream &Str = getStrDump(); |
| Str << "Constant pool use stats: count+value+type\n"; |
| #define X(WhichPool) \ |
| for (auto *C : getConstPool()->WhichPool.getConstantPool()) { \ |
| Str << C->getLookupCount() << " "; \ |
| C->dump(Str); \ |
| Str << " " << C->getType() << "\n"; \ |
| } |
| X(Integers1); |
| X(Integers8); |
| X(Integers16); |
| X(Integers32); |
| X(Integers64); |
| X(Floats); |
| X(Doubles); |
| X(Relocatables); |
| X(ExternRelocatables); |
| #undef X |
| } |
| |
| // TODO(stichnot): Consider adding thread-local caches of constant pool entries |
| // to reduce contention. |
| |
| // All locking is done by the getConstantInt[0-9]+() target function. |
| Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) { |
| switch (Ty) { |
| case IceType_i1: |
| return getConstantInt1(Value); |
| case IceType_i8: |
| return getConstantInt8(Value); |
| case IceType_i16: |
| return getConstantInt16(Value); |
| case IceType_i32: |
| return getConstantInt32(Value); |
| case IceType_i64: |
| return getConstantInt64(Value); |
| default: |
| llvm_unreachable("Bad integer type for getConstant"); |
| } |
| return nullptr; |
| } |
| |
| Constant *GlobalContext::getConstantInt1Internal(int8_t ConstantInt1) { |
| ConstantInt1 &= INT8_C(1); |
| return getConstPool()->Integers1.getOrAdd(this, ConstantInt1); |
| } |
| |
| Constant *GlobalContext::getConstantInt8Internal(int8_t ConstantInt8) { |
| return getConstPool()->Integers8.getOrAdd(this, ConstantInt8); |
| } |
| |
| Constant *GlobalContext::getConstantInt16Internal(int16_t ConstantInt16) { |
| return getConstPool()->Integers16.getOrAdd(this, ConstantInt16); |
| } |
| |
| Constant *GlobalContext::getConstantInt32Internal(int32_t ConstantInt32) { |
| return getConstPool()->Integers32.getOrAdd(this, ConstantInt32); |
| } |
| |
| Constant *GlobalContext::getConstantInt64Internal(int64_t ConstantInt64) { |
| return getConstPool()->Integers64.getOrAdd(this, ConstantInt64); |
| } |
| |
| Constant *GlobalContext::getConstantFloat(float ConstantFloat) { |
| return getConstPool()->Floats.getOrAdd(this, ConstantFloat); |
| } |
| |
| Constant *GlobalContext::getConstantDouble(double ConstantDouble) { |
| return getConstPool()->Doubles.getOrAdd(this, ConstantDouble); |
| } |
| |
| Constant *GlobalContext::getConstantSymWithEmitString( |
| const RelocOffsetT Offset, const RelocOffsetArray &OffsetExpr, |
| GlobalString Name, const std::string &EmitString) { |
| return getConstPool()->Relocatables.getOrAdd( |
| this, RelocatableTuple(Offset, OffsetExpr, Name, EmitString)); |
| } |
| |
| Constant *GlobalContext::getConstantSym(RelocOffsetT Offset, |
| GlobalString Name) { |
| constexpr char EmptyEmitString[] = ""; |
| return getConstantSymWithEmitString(Offset, {}, Name, EmptyEmitString); |
| } |
| |
| Constant *GlobalContext::getConstantExternSym(GlobalString Name) { |
| constexpr RelocOffsetT Offset = 0; |
| return getConstPool()->ExternRelocatables.getOrAdd( |
| this, RelocatableTuple(Offset, {}, Name)); |
| } |
| |
| Constant *GlobalContext::getConstantUndef(Type Ty) { |
| return getConstPool()->Undefs.getOrAdd(this, Ty); |
| } |
| |
| Constant *GlobalContext::getConstantZero(Type Ty) { |
| Constant *Zero = ConstZeroForType[Ty]; |
| if (Zero == nullptr) |
| llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty)); |
| return Zero; |
| } |
| |
| // All locking is done by the getConstant*() target function. |
| Constant *GlobalContext::getConstantZeroInternal(Type Ty) { |
| switch (Ty) { |
| case IceType_i1: |
| return getConstantInt1Internal(0); |
| case IceType_i8: |
| return getConstantInt8Internal(0); |
| case IceType_i16: |
| return getConstantInt16Internal(0); |
| case IceType_i32: |
| return getConstantInt32Internal(0); |
| case IceType_i64: |
| return getConstantInt64Internal(0); |
| case IceType_f32: |
| return getConstantFloat(0); |
| case IceType_f64: |
| return getConstantDouble(0); |
| default: |
| return nullptr; |
| } |
| } |
| |
| ConstantList GlobalContext::getConstantPool(Type Ty) { |
| switch (Ty) { |
| case IceType_i1: |
| case IceType_i8: |
| return getConstPool()->Integers8.getConstantPool(); |
| case IceType_i16: |
| return getConstPool()->Integers16.getConstantPool(); |
| case IceType_i32: |
| return getConstPool()->Integers32.getConstantPool(); |
| case IceType_i64: |
| return getConstPool()->Integers64.getConstantPool(); |
| case IceType_f32: |
| return getConstPool()->Floats.getConstantPool(); |
| case IceType_f64: |
| return getConstPool()->Doubles.getConstantPool(); |
| case IceType_v4i1: |
| case IceType_v8i1: |
| case IceType_v16i1: |
| case IceType_v16i8: |
| case IceType_v8i16: |
| case IceType_v4i32: |
| case IceType_v4f32: |
| llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty)); |
| break; |
| case IceType_void: |
| case IceType_NUM: |
| break; |
| } |
| llvm_unreachable("Unknown type"); |
| } |
| |
| ConstantList GlobalContext::getConstantExternSyms() { |
| return getConstPool()->ExternRelocatables.getConstantPool(); |
| } |
| |
| GlobalString GlobalContext::getGlobalString(const std::string &Name) { |
| return GlobalString::createWithString(this, Name); |
| } |
| |
| JumpTableDataList GlobalContext::getJumpTables() { |
| JumpTableDataList JumpTables(*getJumpTableList()); |
| // Make order deterministic by sorting into functions and then ID of the jump |
| // table within that function. |
| std::sort(JumpTables.begin(), JumpTables.end(), |
| [](const JumpTableData &A, const JumpTableData &B) { |
| if (A.getFunctionName() != B.getFunctionName()) |
| return A.getFunctionName() < B.getFunctionName(); |
| return A.getId() < B.getId(); |
| }); |
| |
| if (getFlags().getReorderPooledConstants()) { |
| // If reorder-pooled-constants option is set to true, we also shuffle the |
| // jump tables before emitting them. |
| |
| // Create a random number generator for jump tables reordering, considering |
| // jump tables as pooled constants. |
| RandomNumberGenerator RNG(getFlags().getRandomSeed(), |
| RPE_PooledConstantReordering); |
| RandomShuffle(JumpTables.begin(), JumpTables.end(), |
| [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); }); |
| } |
| return JumpTables; |
| } |
| |
| void GlobalContext::addJumpTableData(JumpTableData JumpTable) { |
| getJumpTableList()->emplace_back(std::move(JumpTable)); |
| } |
| |
| TimerStackIdT GlobalContext::newTimerStackID(const std::string &Name) { |
| if (!BuildDefs::timers()) |
| return 0; |
| auto Timers = getTimers(); |
| TimerStackIdT NewID = Timers->size(); |
| Timers->push_back(TimerStack(Name)); |
| return NewID; |
| } |
| |
| TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID, |
| const std::string &Name) { |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(StackID < Timers->size()); |
| return Timers->at(StackID).getTimerID(Name); |
| } |
| |
| void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) { |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(StackID < Timers->size()); |
| Timers->at(StackID).push(ID); |
| } |
| |
| void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) { |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(StackID < Timers->size()); |
| Timers->at(StackID).pop(ID); |
| } |
| |
| void GlobalContext::resetTimer(TimerStackIdT StackID) { |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(StackID < Timers->size()); |
| Timers->at(StackID).reset(); |
| } |
| |
| std::string GlobalContext::getTimerName(TimerStackIdT StackID) { |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(StackID < Timers->size()); |
| return Timers->at(StackID).getName(); |
| } |
| |
| void GlobalContext::setTimerName(TimerStackIdT StackID, |
| const std::string &NewName) { |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(StackID < Timers->size()); |
| Timers->at(StackID).setName(NewName); |
| } |
| |
| // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the |
| // interface to take and transfer ownership, but they internally store the raw |
| // Cfg pointer in the work queue. This allows e.g. future queue optimizations |
| // such as the use of atomics to modify queue elements. |
| void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) { |
| assert(Item); |
| { |
| TimerMarker _(TimerStack::TT_qTransPush, this); |
| OptQ.blockingPush(std::move(Item)); |
| } |
| if (getFlags().isSequential()) |
| translateFunctions(); |
| } |
| |
| std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() { |
| TimerMarker _(TimerStack::TT_qTransPop, this); |
| return OptQ.blockingPop(OptQWakeupSize); |
| } |
| |
| void GlobalContext::emitQueueBlockingPush( |
| std::unique_ptr<EmitterWorkItem> Item) { |
| assert(Item); |
| { |
| TimerMarker _(TimerStack::TT_qEmitPush, this); |
| EmitQ.blockingPush(std::move(Item)); |
| } |
| if (getFlags().isSequential()) |
| emitItems(); |
| } |
| |
| std::unique_ptr<EmitterWorkItem> GlobalContext::emitQueueBlockingPop() { |
| TimerMarker _(TimerStack::TT_qEmitPop, this); |
| return EmitQ.blockingPop(); |
| } |
| |
| void GlobalContext::initParserThread() { |
| ThreadContext *Tls = new ThreadContext(); |
| auto Timers = getTimers(); |
| Timers->initInto(Tls->Timers); |
| AllThreadContexts.push_back(Tls); |
| ICE_TLS_SET_FIELD(TLS, Tls); |
| } |
| |
| void GlobalContext::startWorkerThreads() { |
| size_t NumWorkers = getFlags().getNumTranslationThreads(); |
| auto Timers = getTimers(); |
| for (size_t i = 0; i < NumWorkers; ++i) { |
| ThreadContext *WorkerTLS = new ThreadContext(); |
| Timers->initInto(WorkerTLS->Timers); |
| AllThreadContexts.push_back(WorkerTLS); |
| TranslationThreads.push_back(std::thread( |
| &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
| } |
| if (NumWorkers) { |
| ThreadContext *WorkerTLS = new ThreadContext(); |
| Timers->initInto(WorkerTLS->Timers); |
| AllThreadContexts.push_back(WorkerTLS); |
| EmitterThreads.push_back( |
| std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS)); |
| } |
| } |
| |
| void GlobalContext::resetStats() { |
| if (BuildDefs::dump()) |
| ICE_TLS_GET_FIELD(TLS)->StatsFunction.reset(); |
| } |
| |
| void GlobalContext::dumpStats(const Cfg *Func) { |
| if (!getFlags().getDumpStats()) |
| return; |
| if (Func == nullptr) { |
| getStatsCumulative()->dump(Func, this); |
| } else { |
| ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Func, this); |
| } |
| } |
| |
| void GlobalContext::statsUpdateEmitted(uint32_t InstCount) { |
| if (!getFlags().getDumpStats()) |
| return; |
| ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); |
| Tls->StatsFunction.update(CodeStats::CS_InstCount, InstCount); |
| Tls->StatsCumulative.update(CodeStats::CS_InstCount, InstCount); |
| } |
| |
| void GlobalContext::statsUpdateRegistersSaved(uint32_t Num) { |
| if (!getFlags().getDumpStats()) |
| return; |
| ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); |
| Tls->StatsFunction.update(CodeStats::CS_RegsSaved, Num); |
| Tls->StatsCumulative.update(CodeStats::CS_RegsSaved, Num); |
| } |
| |
| void GlobalContext::statsUpdateFrameBytes(uint32_t Bytes) { |
| if (!getFlags().getDumpStats()) |
| return; |
| ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); |
| Tls->StatsFunction.update(CodeStats::CS_FrameByte, Bytes); |
| Tls->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes); |
| } |
| |
| void GlobalContext::statsUpdateSpills() { |
| if (!getFlags().getDumpStats()) |
| return; |
| ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); |
| Tls->StatsFunction.update(CodeStats::CS_NumSpills); |
| Tls->StatsCumulative.update(CodeStats::CS_NumSpills); |
| } |
| |
| void GlobalContext::statsUpdateFills() { |
| if (!getFlags().getDumpStats()) |
| return; |
| ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); |
| Tls->StatsFunction.update(CodeStats::CS_NumFills); |
| Tls->StatsCumulative.update(CodeStats::CS_NumFills); |
| } |
| |
| void GlobalContext::statsUpdateRPImms() { |
| if (!getFlags().getDumpStats()) |
| return; |
| ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); |
| Tls->StatsFunction.update(CodeStats::CS_NumRPImms); |
| Tls->StatsCumulative.update(CodeStats::CS_NumRPImms); |
| } |
| |
| void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) { |
| if (!BuildDefs::timers()) |
| return; |
| auto Timers = getTimers(); |
| assert(Timers->size() > StackID); |
| OstreamLocker L(this); |
| Timers->at(StackID).dump(getStrDump(), DumpCumulative); |
| } |
| |
| void GlobalContext::dumpLocalTimers(const std::string &TimerNameOverride, |
| TimerStackIdT StackID, |
| bool DumpCumulative) { |
| if (!BuildDefs::timers()) |
| return; |
| auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; |
| assert(Timers->size() > StackID); |
| // Temporarily override the thread-local timer name with the given name. |
| // Don't do it permanently because the final timer merge at the end expects |
| // the thread-local timer names to be the same as the global timer name. |
| auto OrigName = getTimerName(StackID); |
| setTimerName(StackID, TimerNameOverride); |
| { |
| OstreamLocker _(this); |
| Timers->at(StackID).dump(getStrDump(), DumpCumulative); |
| } |
| setTimerName(StackID, OrigName); |
| } |
| |
| LockedPtr<StringPool> |
| GlobalStringPoolTraits::getStrings(const GlobalContext *PoolOwner) { |
| return PoolOwner->getStrings(); |
| } |
| |
| TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx, |
| const std::string &FuncName) { |
| if (!BuildDefs::timers()) |
| return 0; |
| if (!getFlags().getTimeEachFunction()) |
| return 0; |
| return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName); |
| } |
| |
| void TimerMarker::push() { |
| switch (StackID) { |
| case GlobalContext::TSK_Default: |
| Active = getFlags().getSubzeroTimingEnabled() || |
| !getFlags().getTimingFocusOnString().empty(); |
| break; |
| case GlobalContext::TSK_Funcs: |
| Active = getFlags().getTimeEachFunction(); |
| break; |
| default: |
| break; |
| } |
| if (Active) |
| Ctx->pushTimer(ID, StackID); |
| } |
| |
| void TimerMarker::pushCfg(const Cfg *Func) { |
| Ctx = Func->getContext(); |
| Active = Func->getFocusedTiming() || getFlags().getSubzeroTimingEnabled(); |
| if (Active) |
| Ctx->pushTimer(ID, StackID); |
| } |
| |
| ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); |
| |
| } // end of namespace Ice |