Subzero: Add rudimentary statistics on generated code.
The following are collected:
- Number of machine instructions emitted
- Number of registers saved/restored in prolog/epilog
- Number of stack frame bytes (non-alloca) allocated
- Number of "spills", or stores to stack slots
- Number of "fills", or loads/operations from stack slots
- Fill+Spill count (sum of above two)
These are somewhat reasonable approximations of code quality, and the primary intention is to compare before-and-after when trying out an optimization.
The statistics are dumped after translating each function. Per-function and cumulative statistics are collected. The output lines have a prefix that is easy to filter.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/580633002
diff --git a/src/IceCfgNode.cpp b/src/IceCfgNode.cpp
index f6b4a98..4de2f57 100644
--- a/src/IceCfgNode.cpp
+++ b/src/IceCfgNode.cpp
@@ -483,6 +483,21 @@
if (Inst->isRedundantAssign())
continue;
(*I)->emit(Func);
+ // Update emitted instruction count, plus fill/spill count for
+ // Variable operands without a physical register.
+ if (uint32_t Count = (*I)->getEmitInstCount()) {
+ Func->getContext()->statsUpdateEmitted(Count);
+ if (Variable *Dest = (*I)->getDest()) {
+ if (!Dest->hasReg())
+ Func->getContext()->statsUpdateFills();
+ }
+ for (SizeT S = 0; S < (*I)->getSrcSize(); ++S) {
+ if (Variable *Src = llvm::dyn_cast<Variable>((*I)->getSrc(S))) {
+ if (!Src->hasReg())
+ Func->getContext()->statsUpdateSpills();
+ }
+ }
+ }
}
}
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 858b7c1..2d14a93 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -24,14 +24,15 @@
ClFlags()
: DisableInternal(false), SubzeroTimingEnabled(false),
DisableTranslation(false), DisableGlobals(false),
- FunctionSections(false), UseSandboxing(false), DefaultGlobalPrefix(""),
- DefaultFunctionPrefix("") {}
+ FunctionSections(false), UseSandboxing(false), DumpStats(false),
+ DefaultGlobalPrefix(""), DefaultFunctionPrefix("") {}
bool DisableInternal;
bool SubzeroTimingEnabled;
bool DisableTranslation;
bool DisableGlobals;
bool FunctionSections;
bool UseSandboxing;
+ bool DumpStats;
IceString DefaultGlobalPrefix;
IceString DefaultFunctionPrefix;
};
diff --git a/src/IceGlobalContext.cpp b/src/IceGlobalContext.cpp
index 805a342..47c7aa8 100644
--- a/src/IceGlobalContext.cpp
+++ b/src/IceGlobalContext.cpp
@@ -384,6 +384,13 @@
llvm_unreachable("Unknown type");
}
+void GlobalContext::dumpStats(const IceString &Name) {
+ if (Flags.DumpStats) {
+ StatsFunction.dump(Name, getStrDump());
+ StatsCumulative.dump("_TOTAL_", getStrDump());
+ }
+}
+
void Timer::printElapsedUs(GlobalContext *Ctx, const IceString &Tag) const {
if (Ctx->isVerbose(IceV_Timing)) {
// Prefixing with '#' allows timing strings to be included
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index e5b39d2..da4b6a1 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -28,6 +28,35 @@
class ClFlags;
+// This class collects rudimentary statistics during translation.
+class CodeStats {
+public:
+ CodeStats()
+ : InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0),
+ Fills(0) {}
+ void reset() { *this = CodeStats(); }
+ void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; }
+ void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; }
+ void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; }
+ void updateSpills() { ++Spills; }
+ void updateFills() { ++Fills; }
+ void dump(const IceString &Name, Ostream &Str) {
+ Str << "|" << Name << "|Inst Count |" << InstructionsEmitted << "\n";
+ Str << "|" << Name << "|Regs Saved |" << RegistersSaved << "\n";
+ Str << "|" << Name << "|Frame Bytes |" << FrameBytes << "\n";
+ Str << "|" << Name << "|Spills |" << Spills << "\n";
+ Str << "|" << Name << "|Fills |" << Fills << "\n";
+ Str << "|" << Name << "|Spills+Fills|" << Spills + Fills << "\n";
+ }
+
+private:
+ uint32_t InstructionsEmitted;
+ uint32_t RegistersSaved;
+ uint32_t FrameBytes;
+ uint32_t Spills;
+ uint32_t Fills;
+};
+
// TODO: Accesses to all non-const fields of GlobalContext need to
// be synchronized, especially the constant pool, the allocator, and
// the output streams.
@@ -101,6 +130,30 @@
// translation.
RandomNumberGenerator &getRNG() { return RNG; }
+ // Reset stats at the beginning of a function.
+ void resetStats() { StatsFunction.reset(); }
+ void dumpStats(const IceString &Name);
+ void statsUpdateEmitted(uint32_t InstCount) {
+ StatsFunction.updateEmitted(InstCount);
+ StatsCumulative.updateEmitted(InstCount);
+ }
+ void statsUpdateRegistersSaved(uint32_t Num) {
+ StatsFunction.updateRegistersSaved(Num);
+ StatsCumulative.updateRegistersSaved(Num);
+ }
+ void statsUpdateFrameBytes(uint32_t Bytes) {
+ StatsFunction.updateFrameBytes(Bytes);
+ StatsCumulative.updateFrameBytes(Bytes);
+ }
+ void statsUpdateSpills() {
+ StatsFunction.updateSpills();
+ StatsCumulative.updateSpills();
+ }
+ void statsUpdateFills() {
+ StatsFunction.updateFills();
+ StatsCumulative.updateFills();
+ }
+
private:
Ostream *StrDump; // Stream for dumping / diagnostics
Ostream *StrEmit; // Stream for code emission
@@ -115,6 +168,8 @@
const ClFlags &Flags;
bool HasEmittedFirstMethod;
RandomNumberGenerator RNG;
+ CodeStats StatsFunction;
+ CodeStats StatsCumulative;
GlobalContext(const GlobalContext &) LLVM_DELETED_FUNCTION;
GlobalContext &operator=(const GlobalContext &) LLVM_DELETED_FUNCTION;
diff --git a/src/IceInst.h b/src/IceInst.h
index 484e10f..18c38dd 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -101,6 +101,12 @@
void livenessLightweight(llvm::BitVector &Live);
void liveness(InstNumberT InstNumber, llvm::BitVector &Live,
Liveness *Liveness, const CfgNode *Node);
+
+ // Get the number of native instructions that this instruction
+ // ultimately emits. By default, high-level instructions don't
+ // result in any native instructions, and a target-specific
+ // instruction results in a single native instruction.
+ virtual uint32_t getEmitInstCount() const { return 0; }
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
virtual void dumpExtras(const Cfg *Func) const;
@@ -744,6 +750,7 @@
// instructions.
class InstTarget : public Inst {
public:
+ virtual uint32_t getEmitInstCount() const { return 1; }
virtual void emit(const Cfg *Func) const = 0;
virtual void dump(const Cfg *Func) const;
virtual void dumpExtras(const Cfg *Func) const;
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index ed538f4..f0558db 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -290,6 +290,7 @@
static InstX8632Label *create(Cfg *Func, TargetX8632 *Target) {
return new (Func->allocate<InstX8632Label>()) InstX8632Label(Func, Target);
}
+ virtual uint32_t getEmitInstCount() const { return 0; }
IceString getName(const Cfg *Func) const;
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
@@ -324,7 +325,7 @@
InstX8632Br(Func, Target, NULL, NULL, Condition);
}
// Create a conditional intra-block branch (or unconditional, if
- // Condition==None) to a label in the current block.
+ // Condition==Br_None) to a label in the current block.
static InstX8632Br *create(Cfg *Func, InstX8632Label *Label,
BrCond Condition) {
return new (Func->allocate<InstX8632Br>())
@@ -332,6 +333,15 @@
}
CfgNode *getTargetTrue() const { return TargetTrue; }
CfgNode *getTargetFalse() const { return TargetFalse; }
+ virtual uint32_t getEmitInstCount() const {
+ if (Label)
+ return 1;
+ if (Condition == Br_None)
+ return 1;
+ if (getTargetFalse())
+ return 2;
+ return 1;
+ }
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return isClassof(Inst, Br); }
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index d74249e..464a2e8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -606,6 +606,11 @@
} else {
_mov(Arg, Mem);
}
+ // This argument-copying instruction uses an explicit
+ // OperandX8632Mem operand instead of a Variable, so its
+ // fill-from-stack operation has to be tracked separately for
+ // statistics.
+ Ctx->statsUpdateFills();
}
}
@@ -746,13 +751,16 @@
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
+ uint32_t NumCallee = 0;
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
+ ++NumCallee;
PreservedRegsSizeBytes += 4;
const bool SuppressStackAdjustment = true;
_push(getPhysicalRegister(i), SuppressStackAdjustment);
}
}
+ Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
@@ -800,6 +808,7 @@
if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Reg_esp),
Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
+ Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment();
diff --git a/src/IceTranslator.cpp b/src/IceTranslator.cpp
index 05332b4..0636b50 100644
--- a/src/IceTranslator.cpp
+++ b/src/IceTranslator.cpp
@@ -71,6 +71,7 @@
}
void Translator::translateFcn(Cfg *Fcn) {
+ Ctx->resetStats();
Func.reset(Fcn);
if (Ctx->getFlags().DisableInternal)
Func->setInternal(false);
@@ -95,6 +96,7 @@
std::cerr << "[Subzero timing] Emit function " << Func->getFunctionName()
<< ": " << TEmit.getElapsedSec() << " sec\n";
}
+ Ctx->dumpStats(Func->getFunctionName());
}
}
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index ce700b7..3cb13b3 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -102,6 +102,10 @@
DisablePhiEdgeSplit("no-phi-edge-split",
cl::desc("Disable edge splitting for Phi lowering"));
+static cl::opt<bool>
+DumpStats("stats",
+ cl::desc("Print statistics after translating each function"));
+
static cl::opt<NaClFileFormat> InputFileFormat(
"bitcode-format", cl::desc("Define format of input file:"),
cl::values(clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
@@ -155,6 +159,7 @@
Flags.DisableGlobals = DisableGlobals;
Flags.FunctionSections = FunctionSections;
Flags.UseSandboxing = UseSandboxing;
+ Flags.DumpStats = DumpStats;
Flags.DefaultGlobalPrefix = DefaultGlobalPrefix;
Flags.DefaultFunctionPrefix = DefaultFunctionPrefix;