Introduce the ability to insert IACA (Intel Architecture Code Analyzer) marks.
The IACI marks identify the code which should be analyzed with the IACA.
The generated binaries are not executable due to the marks. This feature should
only be used during develpoment when analyzing generated code so it is protected
behind the --allow-iaca-marks flag.
ScopedIacaMark is a helper class which opens mark and closes it at the end of
the scope. This is useful when there are many returns as you don't have to write
`_iaca_end()` before them all.
BUG=
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1260093003.
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index ccbfeb8..9d872d2 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -833,6 +833,12 @@
void xchg(Type Ty, const typename Traits::Address &address,
typename Traits::GPRRegister reg);
+ /// \name Intel Architecture Code Analyzer markers.
+ /// @{
+ void iaca_start();
+ void iaca_end();
+ /// @}
+
void emitSegmentOverride(uint8_t prefix);
intptr_t preferredLoopAlignment() { return 16; }
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index 9a439c2..f785756 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -3112,6 +3112,43 @@
emitOperand(gprEncoding(reg), addr);
}
+template <class Machine> void AssemblerX86Base<Machine>::iaca_start() {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitUint8(0x0F);
+ emitUint8(0x0B);
+
+ // mov $111, ebx
+ constexpr typename Traits::GPRRegister dst =
+ Traits::GPRRegister::Encoded_Reg_ebx;
+ constexpr Type Ty = IceType_i32;
+ emitRexB(Ty, dst);
+ emitUint8(0xB8 + gprEncoding(dst));
+ emitImmediate(Ty, Immediate(111));
+
+ emitUint8(0x64);
+ emitUint8(0x67);
+ emitUint8(0x90);
+}
+
+template <class Machine> void AssemblerX86Base<Machine>::iaca_end() {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+
+ // mov $222, ebx
+ constexpr typename Traits::GPRRegister dst =
+ Traits::GPRRegister::Encoded_Reg_ebx;
+ constexpr Type Ty = IceType_i32;
+ emitRexB(Ty, dst);
+ emitUint8(0xB8 + gprEncoding(dst));
+ emitImmediate(Ty, Immediate(222));
+
+ emitUint8(0x64);
+ emitUint8(0x67);
+ emitUint8(0x90);
+
+ emitUint8(0x0F);
+ emitUint8(0x0B);
+}
+
template <class Machine>
void AssemblerX86Base<Machine>::emitSegmentOverride(uint8_t prefix) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
diff --git a/src/IceClFlags.cpp b/src/IceClFlags.cpp
index aa4b4b6..ec7f3bb 100644
--- a/src/IceClFlags.cpp
+++ b/src/IceClFlags.cpp
@@ -34,6 +34,12 @@
cl::desc("Allow error recovery when reading PNaCl bitcode."),
cl::init(false));
+cl::opt<bool> AllowIacaMarks(
+ "allow-iaca-marks",
+ cl::desc("Allow IACA (Intel Architecture Code Analyzer) marks to be "
+ "inserted. These binaries are not executable."),
+ cl::init(false));
+
// This is currently needed by crosstest.py.
cl::opt<bool> AllowUninitializedGlobals(
"allow-uninitialized-globals",
@@ -341,6 +347,7 @@
void ClFlags::resetClFlags(ClFlags &OutFlags) {
// bool fields
OutFlags.AllowErrorRecovery = false;
+ OutFlags.AllowIacaMarks = false;
OutFlags.AllowUninitializedGlobals = false;
OutFlags.DataSections = false;
OutFlags.DecorateAsm = false;
@@ -398,6 +405,7 @@
}
OutFlags.setAllowErrorRecovery(::AllowErrorRecovery);
+ OutFlags.setAllowIacaMarks(::AllowIacaMarks);
OutFlags.setAllowUninitializedGlobals(::AllowUninitializedGlobals);
OutFlags.setDataSections(::DataSections);
OutFlags.setDecorateAsm(::DecorateAsm);
diff --git a/src/IceClFlags.h b/src/IceClFlags.h
index 2698236..2ce3b4e 100644
--- a/src/IceClFlags.h
+++ b/src/IceClFlags.h
@@ -39,6 +39,9 @@
bool getAllowErrorRecovery() const { return AllowErrorRecovery; }
void setAllowErrorRecovery(bool NewValue) { AllowErrorRecovery = NewValue; }
+ bool getAllowIacaMarks() const { return AllowIacaMarks; }
+ void setAllowIacaMarks(bool NewValue) { AllowIacaMarks = NewValue; }
+
bool getAllowUninitializedGlobals() const {
return AllowUninitializedGlobals;
}
@@ -229,6 +232,7 @@
private:
bool AllowErrorRecovery;
+ bool AllowIacaMarks;
bool AllowUninitializedGlobals;
bool DataSections;
bool DecorateAsm;
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 4cfcfd4..6d39005 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -132,7 +132,12 @@
Xadd,
Xchg,
Xor,
- XorRMW
+ XorRMW,
+
+ /// Intel Architecture Code Analyzer markers. These are not executable so
+ /// must only be used for analysis.
+ IacaStart,
+ IacaEnd
};
static const char *getWidthString(Type Ty);
@@ -2694,6 +2699,53 @@
InstX86Xchg(Cfg *Func, Operand *Dest, Variable *Source);
};
+/// Start marker for the Intel Architecture Code Analyzer. This is not an
+/// executable instruction and must only be used for analysis.
+template <class Machine>
+class InstX86IacaStart final : public InstX86Base<Machine> {
+ InstX86IacaStart() = delete;
+ InstX86IacaStart(const InstX86IacaStart &) = delete;
+ InstX86IacaStart &operator=(const InstX86IacaStart &) = delete;
+
+public:
+ static InstX86IacaStart *create(Cfg *Func) {
+ return new (Func->allocate<InstX86IacaStart>()) InstX86IacaStart(Func);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Inst) {
+ return InstX86Base<Machine>::isClassof(Inst,
+ InstX86Base<Machine>::IacaStart);
+ }
+
+private:
+ InstX86IacaStart(Cfg *Func);
+};
+
+/// End marker for the Intel Architecture Code Analyzer. This is not an
+/// executable instruction and must only be used for analysis.
+template <class Machine>
+class InstX86IacaEnd final : public InstX86Base<Machine> {
+ InstX86IacaEnd() = delete;
+ InstX86IacaEnd(const InstX86IacaEnd &) = delete;
+ InstX86IacaEnd &operator=(const InstX86IacaEnd &) = delete;
+
+public:
+ static InstX86IacaEnd *create(Cfg *Func) {
+ return new (Func->allocate<InstX86IacaEnd>()) InstX86IacaEnd(Func);
+ }
+ void emit(const Cfg *Func) const override;
+ void emitIAS(const Cfg *Func) const override;
+ void dump(const Cfg *Func) const override;
+ static bool classof(const Inst *Inst) {
+ return InstX86Base<Machine>::isClassof(Inst, InstX86Base<Machine>::IacaEnd);
+ }
+
+private:
+ InstX86IacaEnd(Cfg *Func);
+};
+
/// struct Insts is a template that can be used to instantiate all the X86
/// instructions for a target with a simple
///
@@ -2798,6 +2850,9 @@
using Setcc = InstX86Setcc<Machine>;
using Xadd = InstX86Xadd<Machine>;
using Xchg = InstX86Xchg<Machine>;
+
+ using IacaStart = InstX86IacaStart<Machine>;
+ using IacaEnd = InstX86IacaEnd<Machine>;
};
/// X86 Instructions have static data (particularly, opcodes and instruction
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 2956558..34417cf 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -367,6 +367,18 @@
this->addSource(Source);
}
+template <class Machine>
+InstX86IacaStart<Machine>::InstX86IacaStart(Cfg *Func)
+ : InstX86Base<Machine>(Func, InstX86Base<Machine>::IacaStart, 0, nullptr) {
+ assert(Func->getContext()->getFlags().getAllowIacaMarks());
+}
+
+template <class Machine>
+InstX86IacaEnd<Machine>::InstX86IacaEnd(Cfg *Func)
+ : InstX86Base<Machine>(Func, InstX86Base<Machine>::IacaEnd, 0, nullptr) {
+ assert(Func->getContext()->getFlags().getAllowIacaMarks());
+}
+
// ======================== Dump routines ======================== //
template <class Machine>
@@ -3156,6 +3168,58 @@
this->dumpSources(Func);
}
+template <class Machine>
+void InstX86IacaStart<Machine>::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\t# IACA_START\n"
+ << "\t.byte 0x0F, 0x0B\n"
+ << "\tmovl\t$111, %ebx\n"
+ << "\t.byte 0x64, 0x67, 0x90";
+}
+
+template <class Machine>
+void InstX86IacaStart<Machine>::emitIAS(const Cfg *Func) const {
+ typename InstX86Base<Machine>::Traits::Assembler *Asm =
+ Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+ Asm->iaca_start();
+}
+
+template <class Machine>
+void InstX86IacaStart<Machine>::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "IACA_START";
+}
+
+template <class Machine>
+void InstX86IacaEnd<Machine>::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\t# IACA_END\n"
+ << "\tmovl\t$222, %ebx\n"
+ << "\t.byte 0x64, 0x67, 0x90\n"
+ << "\t.byte 0x0F, 0x0B";
+}
+
+template <class Machine>
+void InstX86IacaEnd<Machine>::emitIAS(const Cfg *Func) const {
+ typename InstX86Base<Machine>::Traits::Assembler *Asm =
+ Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+ Asm->iaca_end();
+}
+
+template <class Machine>
+void InstX86IacaEnd<Machine>::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "IACA_END";
+}
+
} // end of namespace X86Internal
} // end of namespace Ice
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index dbbb747..40966a2 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -578,6 +578,37 @@
Context.insert(Traits::Insts::XorRMW::create(Func, DestSrc0, Src1));
}
+ void _iaca_start() {
+ if (!BuildDefs::minimal())
+ Context.insert(Traits::Insts::IacaStart::create(Func));
+ }
+ void _iaca_end() {
+ if (!BuildDefs::minimal())
+ Context.insert(Traits::Insts::IacaEnd::create(Func));
+ }
+
+ /// This class helps wrap IACA markers around the code generated by the
+ /// current scope. It means you don't need to put an end before each return.
+ class ScopedIacaMark {
+ ScopedIacaMark(const ScopedIacaMark &) = delete;
+ ScopedIacaMark &operator=(const ScopedIacaMark &) = delete;
+
+ public:
+ ScopedIacaMark(TargetX86Base *Lowering) : Lowering(Lowering) {
+ Lowering->_iaca_start();
+ }
+ ~ScopedIacaMark() { end(); }
+ void end() {
+ if (!Lowering)
+ return;
+ Lowering->_iaca_end();
+ Lowering = nullptr;
+ }
+
+ private:
+ TargetX86Base *Lowering;
+ };
+
bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1);
void findRMW();