Fix timing of parseFunctions.

The previous implementation was charging about 24% more time that it
should to the function parser. The cause was that the time to "queue" the
parsed functions, and the time to emit the assembled code (again
including "queue" time) was not accounted for. About 15% was going to
queuing costs, and 7% to emitting the ELF file.

This CL adds timing of function translateFunctions, which captures most
of the queueing costs, and timing for each of the major ELF emission
functions (emitELF). This allows the corresponding costs to be better
bucketed, and not charged to the time it takes to parse functions in
bitcode files.

Bug=None

R=jpp@chromium.org, stichnot@chromium.org

Review URL: https://codereview.chromium.org/1775603002 .
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index ef8fe90..a44e6b3 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -1027,7 +1027,7 @@
 void Cfg::emit() {
   if (!BuildDefs::dump())
     return;
-  TimerMarker T(TimerStack::TT_emit, this);
+  TimerMarker T(TimerStack::TT_emitAsm, this);
   if (Ctx->getFlags().getDecorateAsm()) {
     renumberInstructions();
     getVMetadata()->init(VMK_Uses);
@@ -1061,7 +1061,7 @@
 }
 
 void Cfg::emitIAS() {
-  TimerMarker T(TimerStack::TT_emit, this);
+  TimerMarker T(TimerStack::TT_emitAsm, this);
   // The emitIAS() routines emit into the internal assembler buffer, so there's
   // no need to lock the streams.
   deleteJumpTableInsts();
diff --git a/src/IceCompiler.cpp b/src/IceCompiler.cpp
index aaaa848..7a0dcbb 100644
--- a/src/IceCompiler.cpp
+++ b/src/IceCompiler.cpp
@@ -129,7 +129,7 @@
     Ctx.lowerJumpTables();
 
     if (Ctx.getFlags().getOutFileType() == FT_Elf) {
-      TimerMarker T1(Ice::TimerStack::TT_emit, &Ctx);
+      TimerMarker T1(Ice::TimerStack::TT_emitAsm, &Ctx);
       Ctx.getObjectWriter()->setUndefinedSyms(Ctx.getConstantExternSyms());
       Ctx.getObjectWriter()->writeNonUserSections();
     }
diff --git a/src/IceELFObjectWriter.cpp b/src/IceELFObjectWriter.cpp
index 22f812d..fd1150e 100644
--- a/src/IceELFObjectWriter.cpp
+++ b/src/IceELFObjectWriter.cpp
@@ -219,6 +219,14 @@
 void ELFObjectWriter::writeFunctionCode(const IceString &FuncName,
                                         bool IsInternal, Assembler *Asm) {
   assert(!SectionNumbersAssigned);
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
+  constexpr TimerStackIdT StackID = GlobalContext::TSK_Funcs;
+  TimerIdT TimerID = 0;
+  bool TimeThisFunction = Ctx.getFlags().getTimeEachFunction();
+  if (TimeThisFunction) {
+    TimerID = Ctx.getTimerID(StackID, FuncName);
+    Ctx.pushTimer(TimerID, StackID);
+  }
   ELFTextSection *Section = nullptr;
   ELFRelocationSection *RelSection = nullptr;
   const bool FunctionSections = Ctx.getFlags().getFunctionSections();
@@ -270,6 +278,8 @@
     RelSection->addRelocations(OffsetInSection, Asm->fixups());
   }
   Section->appendData(Str, Asm->getBufferView());
+  if (TimeThisFunction)
+    Ctx.popTimer(TimerID, StackID);
 }
 
 namespace {
@@ -303,6 +313,7 @@
                                        FixupKind RelocationKind,
                                        const IceString &SectionSuffix,
                                        bool IsPIC) {
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
   assert(!SectionNumbersAssigned);
   VariableDeclarationList VarsBySection[ELFObjectWriter::NumSectionTypes];
   for (auto &SectionList : VarsBySection)
@@ -439,6 +450,7 @@
 }
 
 void ELFObjectWriter::writeInitialELFHeader() {
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
   assert(!SectionNumbersAssigned);
   constexpr Elf64_Off DummySHOffset = 0;
   constexpr SizeT DummySHStrIndex = 0;
@@ -500,6 +512,7 @@
 }
 
 template <typename ConstType> void ELFObjectWriter::writeConstantPool(Type Ty) {
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
   ConstantList Pool = Ctx.getConstantPool(Ty);
   if (Pool.empty()) {
     return;
@@ -576,6 +589,7 @@
 
 void ELFObjectWriter::writeJumpTable(const JumpTableData &JT,
                                      FixupKind RelocationKind, bool IsPIC) {
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
   ELFDataSection *Section;
   ELFRelocationSection *RelSection;
   const Elf64_Xword PointerSize = typeWidthInBytes(getPointerType());
@@ -611,6 +625,7 @@
 }
 
 void ELFObjectWriter::setUndefinedSyms(const ConstantList &UndefSyms) {
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
   for (const Constant *S : UndefSyms) {
     const auto *Sym = llvm::cast<ConstantRelocatable>(S);
     const IceString &Name = Sym->getName();
@@ -642,6 +657,8 @@
 }
 
 void ELFObjectWriter::writeNonUserSections() {
+  TimerMarker Timer(TimerStack::TT_writeELF, &Ctx);
+
   // Write out the shstrtab now that all sections are known.
   ShStrTab->doLayout();
   ShStrTab->setSize(ShStrTab->getSectionDataSize());
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 4e8dc6b..15cc40d 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -307,6 +307,7 @@
 }
 
 void GlobalContext::translateFunctions() {
+  TimerMarker Timer(TimerStack::TT_translateFunctions, this);
   while (std::unique_ptr<Cfg> Func = optQueueBlockingPop()) {
     // Install Func in TLS for Cfg-specific container allocators.
     CfgLocalAllocatorScope _(Func.get());
@@ -380,7 +381,7 @@
 } // end of anonymous namespace
 
 void GlobalContext::emitFileHeader() {
-  TimerMarker T1(Ice::TimerStack::TT_emit, this);
+  TimerMarker T1(Ice::TimerStack::TT_emitAsm, this);
   if (getFlags().getOutFileType() == FT_Elf) {
     getObjectWriter()->writeInitialELFHeader();
   } else {
diff --git a/src/IceTimerTree.def b/src/IceTimerTree.def
index 6e12219..687275a 100644
--- a/src/IceTimerTree.def
+++ b/src/IceTimerTree.def
@@ -27,7 +27,7 @@
   X(doArgLowering)             \
   X(doBranchOpt)               \
   X(doNopInsertion)            \
-  X(emit)                      \
+  X(emitAsm)                   \
   X(emitGlobalInitializers)    \
   X(genCode)                   \
   X(genFrame)                  \
@@ -53,8 +53,10 @@
   X(renumberInstructions)      \
   X(szmain)                    \
   X(translate)                 \
+  X(translateFunctions)        \
   X(validateLiveness)          \
-  X(vmetadata)
+  X(vmetadata)                 \
+  X(writeELF)
 //#define X(tag)
 
 #endif // SUBZERO_SRC_ICETIMERTREE_DEF