Subzero: Add Non-SFI support for x86-32.
The basic model is that each translated function begins with a special "GotVar = getIP" instruction, and each ConstantRelocatable reference is changed to GotVar+ConstantRelocatable@GOTOFF (assuming GotVar is legalized into a physical register). The getIP instruction is late-lowered into:
call __Sz_getIP_<reg>
add <reg>, $_GLOBAL_OFFSET_TABLE_
mov GotVar, <reg>
Note that _GLOBAL_OFFSET_TABLE_ gets a special relocation type.
The register allocator takes GotVar uses into account, giving appropriate weight toward register allocation.
If there are no uses of GotVar, the getIP instruction gets naturally dead-code eliminated. Special treatment is needed to prevent this elimination when the only GotVar uses are for (floating point) constant pool values from Phi instructions, since the Phi lowering with its GotVar legalization happens after the main round of register allocation.
The x86 mem operand now has a IsPIC field to indicate whether it has been PIC-legalized. Mem operands are sometimes legalized more than once, and this IsPIC field keeps GotVar from being added more than once.
We have to limit the aggressiveness of address mode inference, to make sure a register slot is left for the GotVar.
The Subzero runtime has new asm files to implement all possible __Sz_getIP_<reg> helpers.
The szbuild.py script and the spec2k version support Non-SFI builds. Running spec2k depends on a pending change to the spec2k run_all.sh script.
Read-only data sections need to be named .data.rel.ro instead of .rodata because of PIC rules.
Most cross tests are working, but there is some problem with vector types that seems to be not Subzero related, so most vector tests are disabled for now.
Still to do:
* Fix "--nonsfi --filetype=iasm". The llvm-mc assembler doesn't properly apply the _GLOBAL_OFFSET_TABLE_ relocation in iasm mode. Maybe I can find a different syntactic trick that works, or use hybrid iasm for this limited case.
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4327
R=jpp@chromium.org
Review URL: https://codereview.chromium.org/1506653002 .
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 96ed422..addb599 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -24,6 +24,7 @@
#include "IceInst.h"
#include "IceOperand.h"
#include "IceTargetLowering.h"
+#include "IceTargetLoweringX86Base.h"
namespace Ice {
@@ -57,6 +58,10 @@
}
template <typename TraitsType>
+InstImpl<TraitsType>::InstX86GetIP::InstX86GetIP(Cfg *Func, Variable *Dest)
+ : InstX86Base(Func, InstX86Base::GetIP, 0, Dest) {}
+
+template <typename TraitsType>
InstImpl<TraitsType>::InstX86Mul::InstX86Mul(Cfg *Func, Variable *Dest,
Variable *Source1,
Operand *Source2)
@@ -391,6 +396,55 @@
}
template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86GetIP::emit(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(this->getDest()->hasReg());
+ Str << "\t"
+ "addl\t$_GLOBAL_OFFSET_TABLE_, ";
+ this->getDest()->emit(Func);
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86GetIP::emitIAS(const Cfg *Func) const {
+ if (Func->getContext()->getFlags().getOutFileType() == FT_Iasm) {
+ // TODO(stichnot): Find a workaround for llvm-mc's inability to handle
+ // something like ".long _GLOBAL_OFFSET_TABLE_ + ." . One possibility is to
+ // just use hybrid iasm output for this add instruction.
+ llvm::report_fatal_error(
+ "Iasm support for _GLOBAL_OFFSET_TABLE_ not implemented");
+ }
+ Assembler *Asm = Func->getAssembler<Assembler>();
+ assert(this->getDest()->hasReg());
+ GPRRegister Reg = Traits::getEncodedGPR(this->getDest()->getRegNum());
+ Constant *GlobalOffsetTable =
+ Func->getContext()->getConstantExternSym("_GLOBAL_OFFSET_TABLE_");
+ AssemblerFixup *Fixup = Asm->createFixup(Traits::FK_GotPC, GlobalOffsetTable);
+ intptr_t OrigPos = Asm->getBufferSize();
+ constexpr int32_t TempDisp = 0;
+ constexpr int32_t ImmediateWidth = 4;
+ // Emit the add instruction once, in a preliminary fashion, to find its total
+ // size. TODO(stichnot): IceType_i32 should really be something that
+ // represents the target's pointer type.
+ Asm->add(IceType_i32, Reg, AssemblerImmediate(TempDisp, Fixup));
+ const int32_t Disp = Asm->getBufferSize() - OrigPos - ImmediateWidth;
+ // Now roll back and emit the add instruction again, this time with the
+ // correct displacement.
+ Asm->setBufferSize(OrigPos);
+ Asm->add(IceType_i32, Reg, AssemblerImmediate(Disp, Fixup));
+}
+
+template <typename TraitsType>
+void InstImpl<TraitsType>::InstX86GetIP::dump(const Cfg *Func) const {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Func->getContext()->getStrDump();
+ this->getDest()->dump(Func);
+ Str << " = call getIP";
+}
+
+template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Label::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -679,7 +733,8 @@
} else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.GPRImm))(Ty, VarReg, AssemblerImmediate(Imm->getValue()));
} else if (const auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
- AssemblerFixup *Fixup = Asm->createFixup(Traits::RelFixup, Reloc);
+ AssemblerFixup *Fixup =
+ Asm->createFixup(Traits::TargetLowering::getAbsFixup(), Reloc);
(Asm->*(Emitter.GPRImm))(Ty, VarReg,
AssemblerImmediate(Reloc->getOffset(), Fixup));
} else if (const auto *Split = llvm::dyn_cast<VariableSplit>(Src)) {
@@ -703,7 +758,8 @@
} else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.AddrImm))(Ty, Addr, AssemblerImmediate(Imm->getValue()));
} else if (const auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
- AssemblerFixup *Fixup = Asm->createFixup(Traits::RelFixup, Reloc);
+ AssemblerFixup *Fixup =
+ Asm->createFixup(Traits::TargetLowering::getAbsFixup(), Reloc);
(Asm->*(Emitter.AddrImm))(Ty, Addr,
AssemblerImmediate(Reloc->getOffset(), Fixup));
} else {