ExecutionEngine: add preliminary support for COFF ARM64
It was originally committed into LLVM: https://reviews.llvm.org/D69434
This CL aims to continue the work on Swiftshader for Windows on ARM64.
Bug: chromium:893460
Change-Id: Ib0de220c1d332f2a16d2275a5f3aef4d1a53ee33
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/38453
Reviewed-by: Alexis Hétu <sugoi@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1c54ad6..23cde06 100644
--- a/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "RuntimeDyldCOFF.h"
+#include "Targets/RuntimeDyldCOFFAArch64.h"
#include "Targets/RuntimeDyldCOFFI386.h"
#include "Targets/RuntimeDyldCOFFThumb.h"
#include "Targets/RuntimeDyldCOFFX86_64.h"
@@ -56,6 +57,8 @@
return make_unique<RuntimeDyldCOFFThumb>(MemMgr, Resolver);
case Triple::x86_64:
return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
+ case Triple::aarch64:
+ return make_unique<RuntimeDyldCOFFAArch64>(MemMgr, Resolver);
}
}
diff --git a/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h b/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
new file mode 100644
index 0000000..9f2e224
--- /dev/null
+++ b/third_party/llvm-7.0/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
@@ -0,0 +1,365 @@
+//===-- RuntimeDyldCOFFAArch64.h --- COFF/AArch64 specific code ---*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF AArch64 support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFAARCH64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFAARCH64_H
+
+#include "../RuntimeDyldCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Endian.h"
+
+#define DEBUG_TYPE "dyld"
+
+using namespace llvm::support::endian;
+
+namespace llvm {
+
+// This relocation type is used for handling long branch instruction
+// throught the Stub.
+enum InternalRelocationType : unsigned {
+ INTERNAL_REL_ARM64_LONG_BRANCH26 = 0x111,
+};
+
+static void add16(uint8_t *p, int16_t v) { write16le(p, read16le(p) + v); }
+static void or32le(void *P, int32_t V) { write32le(P, read32le(P) | V); }
+
+static void write32AArch64Imm(uint8_t *T, uint64_t imm, uint32_t rangeLimit) {
+ uint32_t orig = read32le(T);
+ orig &= ~(0xFFF << 10);
+ write32le(T, orig | ((imm & (0xFFF >> rangeLimit)) << 10));
+}
+
+static void write32AArch64Ldr(uint8_t *T, uint64_t imm) {
+ uint32_t orig = read32le(T);
+ uint32_t size = orig >> 30;
+ // 0x04000000 indicates SIMD/FP registers
+ // 0x00800000 indicates 128 bit
+ if ((orig & 0x04800000) == 0x04800000)
+ size += 4;
+ if ((imm & ((1 << size) - 1)) != 0)
+ assert(0 && "misaligned ldr/str offset");
+ write32AArch64Imm(T, imm >> size, size);
+}
+
+static void write32AArch64Addr(void *T, uint64_t s, uint64_t p, int shift) {
+ uint64_t Imm = (s >> shift) - (p >> shift);
+ uint32_t ImmLo = (Imm & 0x3) << 29;
+ uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
+ uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
+ write32le(T, (read32le(T) & ~Mask) | ImmLo | ImmHi);
+}
+
+class RuntimeDyldCOFFAArch64 : public RuntimeDyldCOFF {
+
+private:
+ // When a module is loaded we save the SectionID of the unwind
+ // sections in a table until we receive a request to register all
+ // unregisteredEH frame sections with the memory manager.
+ SmallVector<SID, 2> UnregisteredEHFrameSections;
+ SmallVector<SID, 2> RegisteredEHFrameSections;
+ uint64_t ImageBase;
+
+ // Fake an __ImageBase pointer by returning the section with the lowest adress
+ uint64_t getImageBase() {
+ if (!ImageBase) {
+ ImageBase = std::numeric_limits<uint64_t>::max();
+ for (const SectionEntry &Section : Sections)
+ // The Sections list may contain sections that weren't loaded for
+ // whatever reason: they may be debug sections, and ProcessAllSections
+ // is false, or they may be sections that contain 0 bytes. If the
+ // section isn't loaded, the load address will be 0, and it should not
+ // be included in the ImageBase calculation.
+ if (Section.getLoadAddress() != 0)
+ ImageBase = std::min(ImageBase, Section.getLoadAddress());
+ }
+ return ImageBase;
+ }
+
+public:
+ RuntimeDyldCOFFAArch64(RuntimeDyld::MemoryManager &MM,
+ JITSymbolResolver &Resolver)
+ : RuntimeDyldCOFF(MM, Resolver), ImageBase(0) {}
+
+ unsigned getStubAlignment() override { return 8; }
+
+ unsigned getMaxStubSize() override { return 20; }
+
+ std::tuple<uint64_t, uint64_t, uint64_t>
+ generateRelocationStub(unsigned SectionID, StringRef TargetName,
+ uint64_t Offset, uint64_t RelType, uint64_t Addend,
+ StubMap &Stubs) {
+ uintptr_t StubOffset;
+ SectionEntry &Section = Sections[SectionID];
+
+ RelocationValueRef OriginalRelValueRef;
+ OriginalRelValueRef.SectionID = SectionID;
+ OriginalRelValueRef.Offset = Offset;
+ OriginalRelValueRef.Addend = Addend;
+ OriginalRelValueRef.SymbolName = TargetName.data();
+
+ auto Stub = Stubs.find(OriginalRelValueRef);
+ if (Stub == Stubs.end()) {
+ LLVM_DEBUG(dbgs() << " Create a new stub function for "
+ << TargetName.data() << "\n");
+
+ StubOffset = Section.getStubOffset();
+ Stubs[OriginalRelValueRef] = StubOffset;
+ createStubFunction(Section.getAddressWithOffset(StubOffset));
+ Section.advanceStubOffset(getMaxStubSize());
+ } else {
+ LLVM_DEBUG(dbgs() << " Stub function found for " << TargetName.data()
+ << "\n");
+ StubOffset = Stub->second;
+ }
+
+ // Resolve original relocation to stub function.
+ const RelocationEntry RE(SectionID, Offset, RelType, Addend);
+ resolveRelocation(RE, Section.getLoadAddressWithOffset(StubOffset));
+
+ // adjust relocation info so resolution writes to the stub function
+ // Here an internal relocation type is used for resolving long branch via
+ // stub instruction.
+ Addend = 0;
+ Offset = StubOffset;
+ RelType = INTERNAL_REL_ARM64_LONG_BRANCH26;
+
+ return std::make_tuple(Offset, RelType, Addend);
+ }
+
+ Expected<object::relocation_iterator>
+ processRelocationRef(unsigned SectionID, object::relocation_iterator RelI,
+ const object::ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ StubMap &Stubs) override {
+
+ auto Symbol = RelI->getSymbol();
+ if (Symbol == Obj.symbol_end())
+ report_fatal_error("Unknown symbol in relocation");
+
+ Expected<StringRef> TargetNameOrErr = Symbol->getName();
+ if (!TargetNameOrErr)
+ return TargetNameOrErr.takeError();
+ StringRef TargetName = *TargetNameOrErr;
+
+ auto SectionOrErr = Symbol->getSection();
+ if (!SectionOrErr)
+ return SectionOrErr.takeError();
+ auto Section = *SectionOrErr;
+
+ uint64_t RelType = RelI->getType();
+ uint64_t Offset = RelI->getOffset();
+
+ // If there is no section, this must be an external reference.
+ const bool IsExtern = Section == Obj.section_end();
+
+ // Determine the Addend used to adjust the relocation value.
+ uint64_t Addend = 0;
+ SectionEntry &AddendSection = Sections[SectionID];
+ uintptr_t ObjTarget = AddendSection.getObjAddress() + Offset;
+ uint8_t *Displacement = (uint8_t *)ObjTarget;
+
+ switch (RelType) {
+ case COFF::IMAGE_REL_ARM64_ADDR32:
+ case COFF::IMAGE_REL_ARM64_ADDR32NB:
+ case COFF::IMAGE_REL_ARM64_REL32:
+ case COFF::IMAGE_REL_ARM64_SECREL:
+ Addend = read32le(Displacement);
+ break;
+ case COFF::IMAGE_REL_ARM64_BRANCH26: {
+ uint32_t orig = read32le(Displacement);
+ Addend = (orig & 0x03FFFFFF) << 2;
+
+ if (IsExtern)
+ std::tie(Offset, RelType, Addend) = generateRelocationStub(
+ SectionID, TargetName, Offset, RelType, Addend, Stubs);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH19: {
+ uint32_t orig = read32le(Displacement);
+ Addend = (orig & 0x00FFFFE0) >> 3;
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH14: {
+ uint32_t orig = read32le(Displacement);
+ Addend = (orig & 0x000FFFE0) >> 3;
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_REL21:
+ case COFF::IMAGE_REL_ARM64_PAGEBASE_REL21: {
+ uint32_t orig = read32le(Displacement);
+ Addend = ((orig >> 29) & 0x3) | ((orig >> 3) & 0x1FFFFC);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L:
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A: {
+ uint32_t orig = read32le(Displacement);
+ Addend = ((orig >> 10) & 0xFFF);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR64: {
+ Addend = read64le(Displacement);
+ }
+ default:
+ break;
+ }
+
+#if !defined(NDEBUG)
+ SmallString<32> RelTypeName;
+ RelI->getTypeName(RelTypeName);
+
+ LLVM_DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
+ << " RelType: " << RelTypeName << " TargetName: "
+ << TargetName << " Addend " << Addend << "\n");
+#endif
+
+ unsigned TargetSectionID = -1;
+ if (IsExtern) {
+ RelocationEntry RE(SectionID, Offset, RelType, Addend);
+ addRelocationForSymbol(RE, TargetName);
+ } else {
+ if (auto TargetSectionIDOrErr = findOrEmitSection(
+ Obj, *Section, Section->isText(), ObjSectionToID)) {
+ TargetSectionID = *TargetSectionIDOrErr;
+ } else
+ return TargetSectionIDOrErr.takeError();
+
+ uint64_t TargetOffset = getSymbolOffset(*Symbol);
+ RelocationEntry RE(SectionID, Offset, RelType, TargetOffset + Addend);
+ addRelocationForSection(RE, TargetSectionID);
+ }
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ const auto Section = Sections[RE.SectionID];
+ uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
+ uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("unsupported relocation type");
+ break;
+ case COFF::IMAGE_REL_ARM64_ABSOLUTE: {
+ // This relocation is ignored.
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEBASE_REL21: {
+ // The page base of the target, for ADRP instruction.
+ Value += RE.Addend;
+ write32AArch64Addr(Target, Value, FinalAddress, 12);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_REL21: {
+ // The 12-bit relative displacement to the target, for instruction ADR
+ Value += RE.Addend;
+ write32AArch64Addr(Target, Value, FinalAddress, 0);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12A: {
+ // The 12-bit page offset of the target,
+ // for instructions ADD/ADDS (immediate) with zero shift.
+ Value += RE.Addend;
+ write32AArch64Imm(Target, Value & 0xFFF, 0);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_PAGEOFFSET_12L: {
+ // The 12-bit page offset of the target,
+ // for instruction LDR (indexed, unsigned immediate).
+ Value += RE.Addend;
+ write32AArch64Ldr(Target, Value & 0xFFF);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR32: {
+ // The 32-bit VA of the target.
+ uint32_t VA = Value + RE.Addend;
+ write32le(Target, VA);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR32NB: {
+ // The target's 32-bit RVA.
+ uint64_t RVA = Value + RE.Addend - getImageBase();
+ write32le(Target, RVA);
+ break;
+ }
+ case INTERNAL_REL_ARM64_LONG_BRANCH26: {
+ // Encode the immadiate value for generated Stub instruction (MOVZ)
+ or32le(Target + 12, ((Value + RE.Addend) & 0xFFFF) << 5);
+ or32le(Target + 8, ((Value + RE.Addend) & 0xFFFF0000) >> 11);
+ or32le(Target + 4, ((Value + RE.Addend) & 0xFFFF00000000) >> 27);
+ or32le(Target + 0, ((Value + RE.Addend) & 0xFFFF000000000000) >> 43);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH26: {
+ // The 26-bit relative displacement to the target, for B and BL
+ // instructions.
+ uint64_t PCRelVal = Value + RE.Addend - FinalAddress;
+ assert(isInt<28>(PCRelVal) && "Branch target is out of range.");
+ write32le(Target, (read32le(Target) & ~(0x03FFFFFF)) |
+ (PCRelVal & 0x0FFFFFFC) >> 2);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH19: {
+ // The 19-bit offset to the relocation target,
+ // for conditional B instruction.
+ uint64_t PCRelVal = Value + RE.Addend - FinalAddress;
+ assert(isInt<21>(PCRelVal) && "Branch target is out of range.");
+ write32le(Target, (read32le(Target) & ~(0x00FFFFE0)) |
+ (PCRelVal & 0x001FFFFC) << 3);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_BRANCH14: {
+ // The 14-bit offset to the relocation target,
+ // for instructions TBZ and TBNZ.
+ uint64_t PCRelVal = Value + RE.Addend - FinalAddress;
+ assert(isInt<16>(PCRelVal) && "Branch target is out of range.");
+ write32le(Target, (read32le(Target) & ~(0x000FFFE0)) |
+ (PCRelVal & 0x0000FFFC) << 3);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_ADDR64: {
+ // The 64-bit VA of the relocation target.
+ write64le(Target, Value + RE.Addend);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_SECTION: {
+ // 16-bit section index of the section that contains the target.
+ assert(static_cast<uint32_t>(RE.SectionID) <= UINT16_MAX &&
+ "relocation overflow");
+ add16(Target, RE.SectionID);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_SECREL: {
+ // 32-bit offset of the target from the beginning of its section.
+ assert(static_cast<int64_t>(RE.Addend) <= INT32_MAX &&
+ "Relocation overflow");
+ assert(static_cast<int64_t>(RE.Addend) >= INT32_MIN &&
+ "Relocation underflow");
+ write32le(Target, RE.Addend);
+ break;
+ }
+ case COFF::IMAGE_REL_ARM64_REL32: {
+ // The 32-bit relative address from the byte following the relocation.
+ uint64_t Result = Value - FinalAddress - 4;
+ write32le(Target, Result + RE.Addend);
+ break;
+ }
+ }
+ }
+
+ void registerEHFrames() override {}
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/third_party/llvm-7.0/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/third_party/llvm-7.0/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 120d713..fdd3efe 100644
--- a/third_party/llvm-7.0/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/third_party/llvm-7.0/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -223,7 +223,10 @@
// The default MCJIT memory managers make no guarantees about where they can
// find an executable page; JITed code needs to be able to refer to globals
// no matter how far away they are.
- if (JIT)
+ // We should set the CodeModel::Small for Windows ARM64 in JIT mode,
+ // since with large code model LLVM generating 4 MOV instructions, and
+ // Windows doesn't support relocating these long branch (4 MOVs).
+ if (JIT && !TT.isOSWindows())
return CodeModel::Large;
return CodeModel::Small;
}
diff --git a/third_party/llvm-7.0/llvm/test/ExecutionEngine/RuntimeDyld/AArch64/COFF_AArch64.s b/third_party/llvm-7.0/llvm/test/ExecutionEngine/RuntimeDyld/AArch64/COFF_AArch64.s
new file mode 100644
index 0000000..467f99b
--- /dev/null
+++ b/third_party/llvm-7.0/llvm/test/ExecutionEngine/RuntimeDyld/AArch64/COFF_AArch64.s
@@ -0,0 +1,152 @@
+// RUN: llvm-mc -triple aarch64-windows -filetype obj -o %t.obj %s
+// RUN: llvm-rtdyld -triple aarch64-windows -dummy-extern dummy=0x79563413 -dummy-extern dummyA=0x78566879 -target-addr-start=40960000000000 -verify -check %s %t.obj
+
+ .text
+ .def _bnamed
+ .scl 2
+ .type 32
+ .endef
+
+ .globl _bnamed
+ .align 2
+_bnamed:
+ ret
+
+ .def _foo
+ .scl 2
+ .type 32
+ .endef
+ .globl _foo
+ .align 2
+_foo:
+ movz w0, #0
+ ret
+
+ .globl _test_adr_relocation
+ .align 2
+
+# IMAGE_REL_ARM64_REL21
+# rtdyld-check: decode_operand(adr1, 1) = (_const[20:0] - adr1[20:0])
+_test_adr_relocation:
+adr1:
+ adr x0, _const
+ ret
+
+ .globl _test_branch26_reloc
+ .align 2
+
+# IMAGE_REL_ARM64_BRANCH26, test long branch
+# rtdyld-check: decode_operand(brel, 0)[25:0] = (stub_addr(COFF_AArch64.s.tmp.obj/.text, dummy) - brel)[27:2]
+_test_branch26_reloc:
+brel:
+ b dummy
+ ret
+
+ .globl _test_branch19_reloc
+ .align 2
+
+# IMAGE_REL_ARM64_BRANCH19
+# rtdyld-check: decode_operand(bcond, 1)[18:0] = (_foo - bcond)[20:2]
+_test_branch19_reloc:
+ mov x0, #3
+ cmp x0, #2
+bcond:
+ bne _foo
+ ret
+
+ .globl _test_branch14_reloc
+ .align 2
+
+# IMAGE_REL_ARM64_BRANCH14
+# rtdyld-check: decode_operand(tbz_branch, 2)[13:0] = (_bnamed - tbz_branch)[15:2]
+_test_branch14_reloc:
+ mov x1, #0
+tbz_branch:
+ tbz x1, #0, _bnamed
+ ret
+
+ .globl _test_adrp_ldr_reloc
+ .align 2
+
+# IMAGE_REL_ARM64_PAGEBASE_REL21
+# rtdyld-check: decode_operand(adrp1, 1) = (_const[32:12] - adrp1[32:12])
+_test_adrp_ldr_reloc:
+adrp1:
+ adrp x0, _const
+
+# IMAGE_REL_ARM64_PAGEOFFSET_12L
+# rtdyld-check: decode_operand(ldr1, 2) = _const[11:3]
+ldr1:
+ ldr x0, [x0, #:lo12:_const]
+ ret
+
+ .globl _test_add_reloc
+ .align 2
+
+# IMAGE_REL_ARM64_PAGEOFFSET_12A
+# rtdyld-check: decode_operand(add1, 2) = (tgt+4)[11:0]
+_test_add_reloc:
+add1:
+ add x0, x0, tgt@PAGEOFF+4
+ ret
+
+ .section .data
+ .globl _test_addr64_reloc
+ .align 2
+
+# IMAGE_REL_ARM64_ADDR64
+# rtdyld-check: *{8}addr64 = tgt+4
+_test_addr64_reloc:
+addr64:
+ .quad tgt+4
+
+# IMAGE_REL_ARM64_ADDR32
+# rtdyld-check: *{4}_test_addr32_reloc = 0x78566879
+_test_addr32_reloc:
+ .long dummyA
+
+ .globl _relocations
+ .align 2
+
+# IMAGE_REL_ARM64_ADDR32NB, RVA of the target
+# rtdyld-check: *{4}_relocations = _foo - 40960000000000
+_relocations:
+ .long _foo@IMGREL
+
+# IMAGE_REL_ARM64_ADDR32NB
+# rtdyld-check: *{4}imgrel2 = _string - 40960000000000+5
+imgrel2:
+ .long _string@IMGREL+5
+
+# IMAGE_REL_ARM64_SECTION
+# rtdyld-check: *{2}secindex = 1
+secindex:
+ .secidx _test_addr32_reloc
+
+# IMAGE_REL_ARM64_SECREL
+# rtdyld-check: *{4}secrel = string - section_addr(COFF_AArch64.s.tmp.obj, .data)
+secrel:
+ .secrel32 string
+
+ .globl _const
+ .align 3
+_const:
+ .quad 4614256650576692846
+
+tgt:
+ .word 1
+ .word 2
+ .word 3
+ .word 4
+ .word 5
+
+ .globl string
+ .align 2
+string:
+ .asciz "Hello World\n"
+
+ .section .rdata,"dr"
+ .globl _string
+ .align 2
+_string:
+ .asciz "Hello World\n"