Subzero: Add rudimentary statistics on generated code.

The following are collected:
- Number of machine instructions emitted
- Number of registers saved/restored in prolog/epilog
- Number of stack frame bytes (non-alloca) allocated
- Number of "spills", or stores to stack slots
- Number of "fills", or loads/operations from stack slots
- Fill+Spill count (sum of above two)

These are somewhat reasonable approximations of code quality, and the primary intention is to compare before-and-after when trying out an optimization.

The statistics are dumped after translating each function.  Per-function and cumulative statistics are collected.  The output lines have a prefix that is easy to filter.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/580633002
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index f6b4a98..4de2f57 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -483,6 +483,21 @@
     if (Inst->isRedundantAssign())
       continue;
     (*I)->emit(Func);
+    // Update emitted instruction count, plus fill/spill count for
+    // Variable operands without a physical register.
+    if (uint32_t Count = (*I)->getEmitInstCount()) {
+      Func->getContext()->statsUpdateEmitted(Count);
+      if (Variable *Dest = (*I)->getDest()) {
+        if (!Dest->hasReg())
+          Func->getContext()->statsUpdateFills();
+      }
+      for (SizeT S = 0; S < (*I)->getSrcSize(); ++S) {
+        if (Variable *Src = llvm::dyn_cast<Variable>((*I)->getSrc(S))) {
+          if (!Src->hasReg())
+            Func->getContext()->statsUpdateSpills();
+        }
+      }
+    }
   }
 }
 
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 858b7c1..2d14a93 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -24,14 +24,15 @@
   ClFlags()
       : DisableInternal(false), SubzeroTimingEnabled(false),
         DisableTranslation(false), DisableGlobals(false),
-        FunctionSections(false), UseSandboxing(false), DefaultGlobalPrefix(""),
-        DefaultFunctionPrefix("") {}
+        FunctionSections(false), UseSandboxing(false), DumpStats(false),
+        DefaultGlobalPrefix(""), DefaultFunctionPrefix("") {}
   bool DisableInternal;
   bool SubzeroTimingEnabled;
   bool DisableTranslation;
   bool DisableGlobals;
   bool FunctionSections;
   bool UseSandboxing;
+  bool DumpStats;
   IceString DefaultGlobalPrefix;
   IceString DefaultFunctionPrefix;
 };
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 805a342..47c7aa8 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -384,6 +384,13 @@
   llvm_unreachable("Unknown type");
 }
 
+void GlobalContext::dumpStats(const IceString &Name) {
+  if (Flags.DumpStats) {
+    StatsFunction.dump(Name, getStrDump());
+    StatsCumulative.dump("_TOTAL_", getStrDump());
+  }
+}
+
 void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const {
   if (Ctx->isVerbose(IceV_Timing)) {
     // Prefixing with '#' allows timing strings to be included
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index e5b39d2..da4b6a1 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -28,6 +28,35 @@
 
 class ClFlags;
 
+// This class collects rudimentary statistics during translation.
+class CodeStats {
+public:
+  CodeStats()
+      : InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
+        Fills(0) {}
+  void reset() { *this = CodeStats(); }
+  void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
+  void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
+  void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
+  void updateSpills() { ++Spills; }
+  void updateFills() { ++Fills; }
+  void dump(const IceString &Name, Ostream &Str) {
+    Str << "|" << Name << "|Inst Count  |" << InstructionsEmitted << "\n";
+    Str << "|" << Name << "|Regs Saved  |" << RegistersSaved << "\n";
+    Str << "|" << Name << "|Frame Bytes |" << FrameBytes << "\n";
+    Str << "|" << Name << "|Spills      |" << Spills << "\n";
+    Str << "|" << Name << "|Fills       |" << Fills << "\n";
+    Str << "|" << Name << "|Spills+Fills|" << Spills + Fills << "\n";
+  }
+
+private:
+  uint32_t InstructionsEmitted;
+  uint32_t RegistersSaved;
+  uint32_t FrameBytes;
+  uint32_t Spills;
+  uint32_t Fills;
+};
+
 // TODO: Accesses to all non-const fields of GlobalContext need to
 // be synchronized, especially the constant pool, the allocator, and
 // the output streams.
@@ -101,6 +130,30 @@
   // translation.
   RandomNumberGenerator &getRNG() { return RNG; }
 
+  // Reset stats at the beginning of a function.
+  void resetStats() { StatsFunction.reset(); }
+  void dumpStats(const IceString &Name);
+  void statsUpdateEmitted(uint32_t InstCount) {
+    StatsFunction.updateEmitted(InstCount);
+    StatsCumulative.updateEmitted(InstCount);
+  }
+  void statsUpdateRegistersSaved(uint32_t Num) {
+    StatsFunction.updateRegistersSaved(Num);
+    StatsCumulative.updateRegistersSaved(Num);
+  }
+  void statsUpdateFrameBytes(uint32_t Bytes) {
+    StatsFunction.updateFrameBytes(Bytes);
+    StatsCumulative.updateFrameBytes(Bytes);
+  }
+  void statsUpdateSpills() {
+    StatsFunction.updateSpills();
+    StatsCumulative.updateSpills();
+  }
+  void statsUpdateFills() {
+    StatsFunction.updateFills();
+    StatsCumulative.updateFills();
+  }
+
 private:
   Ostream *StrDump; // Stream for dumping / diagnostics
   Ostream *StrEmit; // Stream for code emission
@@ -115,6 +168,8 @@
   const ClFlags &Flags;
   bool HasEmittedFirstMethod;
   RandomNumberGenerator RNG;
+  CodeStats StatsFunction;
+  CodeStats StatsCumulative;
   GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
   GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
 
diff --git a/src/IceInst.h b/src/IceInst.h
index 484e10f..18c38dd 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -101,6 +101,12 @@
   void livenessLightweight(llvm::BitVector &Live);
   void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
                 Liveness *Liveness, const CfgNode *Node);
+
+  // Get the number of native instructions that this instruction
+  // ultimately emits.  By default, high-level instructions don't
+  // result in any native instructions, and a target-specific
+  // instruction results in a single native instruction.
+  virtual uint32_t getEmitInstCount() const { return 0; }
   virtual void emit(const Cfg *Func) const;
   virtual void dump(const Cfg *Func) const;
   virtual void dumpExtras(const Cfg *Func) const;
@@ -744,6 +750,7 @@
 // instructions.
 class InstTarget : public Inst {
 public:
+  virtual uint32_t getEmitInstCount() const { return 1; }
   virtual void emit(const Cfg *Func) const = 0;
   virtual void dump(const Cfg *Func) const;
   virtual void dumpExtras(const Cfg *Func) const;
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index ed538f4..f0558db 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -290,6 +290,7 @@
   static InstX8632Label *create(Cfg *Func, TargetX8632 *Target) {
     return new (Func->allocate<InstX8632Label>()) InstX8632Label(Func, Target);
   }
+  virtual uint32_t getEmitInstCount() const { return 0; }
   IceString getName(const Cfg *Func) const;
   virtual void emit(const Cfg *Func) const;
   virtual void dump(const Cfg *Func) const;
@@ -324,7 +325,7 @@
         InstX8632Br(Func, Target, NULL, NULL, Condition);
   }
   // Create a conditional intra-block branch (or unconditional, if
-  // Condition==None) to a label in the current block.
+  // Condition==Br_None) to a label in the current block.
   static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
                              BrCond Condition) {
     return new (Func->allocate<InstX8632Br>())
@@ -332,6 +333,15 @@
   }
   CfgNode *getTargetTrue() const { return TargetTrue; }
   CfgNode *getTargetFalse() const { return TargetFalse; }
+  virtual uint32_t getEmitInstCount() const {
+    if (Label)
+      return 1;
+    if (Condition == Br_None)
+      return 1;
+    if (getTargetFalse())
+      return 2;
+    return 1;
+  }
   virtual void emit(const Cfg *Func) const;
   virtual void dump(const Cfg *Func) const;
   static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index d74249e..464a2e8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -606,6 +606,11 @@
     } else {
       _mov(Arg, Mem);
     }
+    // This argument-copying instruction uses an explicit
+    // OperandX8632Mem operand instead of a Variable, so its
+    // fill-from-stack operation has to be tracked separately for
+    // statistics.
+    Ctx->statsUpdateFills();
   }
 }
 
@@ -746,13 +751,16 @@
   SpillAreaSizeBytes += GlobalsSize;
 
   // Add push instructions for preserved registers.
+  uint32_t NumCallee = 0;
   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
     if (CalleeSaves[i] && RegsUsed[i]) {
+      ++NumCallee;
       PreservedRegsSizeBytes += 4;
       const bool SuppressStackAdjustment = true;
       _push(getPhysicalRegister(i), SuppressStackAdjustment);
     }
   }
+  Ctx->statsUpdateRegistersSaved(NumCallee);
 
   // Generate "push ebp; mov ebp, esp"
   if (IsEbpBasedFrame) {
@@ -800,6 +808,7 @@
   if (SpillAreaSizeBytes)
     _sub(getPhysicalRegister(Reg_esp),
          Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
+  Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
 
   resetStackAdjustment();
 
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index 05332b4..0636b50 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -71,6 +71,7 @@
 }
 
 void Translator::translateFcn(Cfg *Fcn) {
+  Ctx->resetStats();
   Func.reset(Fcn);
   if (Ctx->getFlags().DisableInternal)
     Func->setInternal(false);
@@ -95,6 +96,7 @@
       std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName()
                 << ": " << TEmit.getElapsedSec() << " sec\n";
     }
+    Ctx->dumpStats(Func->getFunctionName());
   }
 }
 
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index ce700b7..3cb13b3 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -102,6 +102,10 @@
     DisablePhiEdgeSplit("no-phi-edge-split",
                         cl::desc("Disable edge splitting for Phi lowering"));
 
+static cl::opt<bool>
+DumpStats("stats",
+          cl::desc("Print statistics after translating each function"));
+
 static cl::opt<NaClFileFormat> InputFileFormat(
     "bitcode-format", cl::desc("Define format of input file:"),
     cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
@@ -155,6 +159,7 @@
   Flags.DisableGlobals = DisableGlobals;
   Flags.FunctionSections = FunctionSections;
   Flags.UseSandboxing = UseSandboxing;
+  Flags.DumpStats = DumpStats;
   Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
   Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;