Subzero: Support multiple fixups in one instruction.
The integrated assembler assumed there is at most one fixup per instruction. For x86, there could actually be two fixups, for any instruction that allows memory and immediate operands at the same time. Using the now-default -build-on-read flag, it happens in spec2k - the smallest function I found where this happens is 176.gcc and perm_tree_cons.
This changes the textual emission of integrated assembler code to allow for multiple consecutive fixups in a single instruction.
BUG= none
R=jvoung@chromium.org
Review URL: https://codereview.chromium.org/693393002
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 081658f..f5e4550 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -349,13 +349,8 @@
GlobalContext *Ctx = Func->getContext();
Ostream &Str = Ctx->getStrEmit();
intptr_t EndPosition = Asm->GetPosition();
- intptr_t LastFixupLoc = -1;
- AssemblerFixup *LastFixup = NULL;
- if (Asm->GetLatestFixup()) {
- LastFixup = Asm->GetLatestFixup();
- LastFixupLoc = LastFixup->position();
- }
- if (LastFixupLoc < StartPosition) {
+ AssemblerFixup *LastFixup = Asm->GetLatestFixup(StartPosition);
+ if (LastFixup == NULL) {
// The fixup doesn't apply to this current block.
for (intptr_t i = StartPosition; i < EndPosition; ++i) {
Str << "\t.byte 0x";
@@ -364,25 +359,32 @@
}
return;
}
+ intptr_t LastFixupLoc = LastFixup->position();
const intptr_t FixupSize = 4;
- assert(LastFixupLoc + FixupSize <= EndPosition);
// The fixup does apply to this current block.
for (intptr_t i = StartPosition; i < LastFixupLoc; ++i) {
Str << "\t.byte 0x";
Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
Str << "\n";
}
- Str << "\t.long ";
- const ConstantRelocatable *Reloc = LastFixup->value();
- if (Reloc->getSuppressMangling())
- Str << Reloc->getName();
- else
- Str << Ctx->mangleName(Reloc->getName());
- if (LastFixup->value()->getOffset()) {
- Str << " + " << LastFixup->value()->getOffset();
+ while (LastFixup) {
+ Str << "\t.long ";
+ const ConstantRelocatable *Reloc = LastFixup->value();
+ if (Reloc->getSuppressMangling())
+ Str << Reloc->getName();
+ else
+ Str << Ctx->mangleName(Reloc->getName());
+ if (LastFixup->value()->getOffset()) {
+ Str << " + " << LastFixup->value()->getOffset();
+ }
+ Str << "\n";
+ LastFixupLoc += FixupSize;
+ assert(LastFixupLoc <= EndPosition);
+ LastFixup = Asm->GetLatestFixup(LastFixupLoc);
+ // Assume multi-fixups are adjacent in the instruction encoding.
+ assert(LastFixup == NULL || LastFixup->position() == LastFixupLoc);
}
- Str << "\n";
- for (intptr_t i = LastFixupLoc + FixupSize; i < EndPosition; ++i) {
+ for (intptr_t i = LastFixupLoc; i < EndPosition; ++i) {
Str << "\t.byte 0x";
Str.write_hex(Asm->LoadBuffer<uint8_t>(i));
Str << "\n";
diff --git a/src/assembler.cpp b/src/assembler.cpp
index 6f2ec81..6099d71 100644
--- a/src/assembler.cpp
+++ b/src/assembler.cpp
@@ -74,10 +74,16 @@
AssemblerBuffer::~AssemblerBuffer() {}
-AssemblerFixup *AssemblerBuffer::GetLatestFixup() const {
- if (fixups_.empty())
- return NULL;
- return fixups_.back();
+// Returns the latest fixup at or after the given position, or NULL if
+// there is none. Assumes fixups were added in increasing order.
+AssemblerFixup *AssemblerBuffer::GetLatestFixup(intptr_t position) const {
+ AssemblerFixup *latest_fixup = NULL;
+ for (auto I = fixups_.rbegin(), E = fixups_.rend(); I != E; ++I) {
+ if ((*I)->position() < position)
+ return latest_fixup;
+ latest_fixup = *I;
+ }
+ return latest_fixup;
}
void AssemblerBuffer::ProcessFixups(const MemoryRegion ®ion) {
diff --git a/src/assembler.h b/src/assembler.h
index d3cd9bf..dcb83ed 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -163,7 +163,7 @@
intptr_t GetPosition() const { return cursor_ - contents_; }
// For bringup only.
- AssemblerFixup *GetLatestFixup() const;
+ AssemblerFixup *GetLatestFixup(intptr_t position) const;
private:
// The limit is set to kMinimumGap bytes before the end of the data area.
diff --git a/src/assembler_ia32.h b/src/assembler_ia32.h
index b1079d1..f393f71 100644
--- a/src/assembler_ia32.h
+++ b/src/assembler_ia32.h
@@ -830,7 +830,9 @@
template <typename T> T LoadBuffer(intptr_t position) const {
return buffer_.Load<T>(position);
}
- AssemblerFixup *GetLatestFixup() const { return buffer_.GetLatestFixup(); }
+ AssemblerFixup *GetLatestFixup(intptr_t position) const {
+ return buffer_.GetLatestFixup(position);
+ }
private:
inline void EmitUint8(uint8_t value);
diff --git a/tests_lit/llvm2ice_tests/ias-multi-reloc.ll b/tests_lit/llvm2ice_tests/ias-multi-reloc.ll
new file mode 100644
index 0000000..4e25c66
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/ias-multi-reloc.ll
@@ -0,0 +1,61 @@
+; Tests the integrated assembler for instructions with multiple
+; relocations.
+
+; RUN: %p2i -i %s --args -O2 | FileCheck %s
+
+; char global_char;
+; char *p_global_char;
+; void dummy();
+; void store_immediate_to_global() { p_global_char = &global_char; }
+; void add_in_place() { p_global_char += (int)&global_char; }
+; void cmp_global_immediate() { if (p_global_char == &global_char) dummy(); }
+
+@global_char = internal global [1 x i8] zeroinitializer, align 1
+@p_global_char = internal global [4 x i8] zeroinitializer, align 4
+declare void @dummy(i32)
+
+define internal void @store_immediate_to_global() {
+entry:
+ %p_global_char.bc = bitcast [4 x i8]* @p_global_char to i32*
+ %expanded1 = ptrtoint [1 x i8]* @global_char to i32
+ store i32 %expanded1, i32* %p_global_char.bc, align 1
+ ret void
+}
+; CHECK-LABEL: store_immediate_to_global
+; CHECK: .long p_global_char
+; CHECK: .long global_char
+
+define internal void @add_in_place() {
+entry:
+ %p_global_char.bc = bitcast [4 x i8]* @p_global_char to i32*
+ %0 = load i32* %p_global_char.bc, align 1
+ %expanded1 = ptrtoint [1 x i8]* @global_char to i32
+ %gep = add i32 %0, %expanded1
+ %p_global_char.bc3 = bitcast [4 x i8]* @p_global_char to i32*
+ store i32 %gep, i32* %p_global_char.bc3, align 1
+ ret void
+}
+; CHECK-LABEL: add_in_place
+; CHECK: .long global_char
+; CHECK: .long p_global_char
+
+define internal void @cmp_global_immediate() {
+entry:
+ %p_global_char.bc = bitcast [4 x i8]* @p_global_char to i32*
+ %0 = load i32* %p_global_char.bc, align 1
+ %expanded1 = ptrtoint [1 x i8]* @global_char to i32
+ %cmp = icmp eq i32 %0, %expanded1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %dummy.bc = bitcast void (i32)* @dummy to void ()*
+ tail call void %dummy.bc()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+; CHECK-LABEL: cmp_global_immediate
+; CHECK: .long p_global_char
+; CHECK: .long global_char
+; CHECK: call dummy