Subzero. Native 64-bit int arithmetic on x86-64.
This CL modifies the x86 instruction selection template to allow native
64-bit GPR support. It also enables x86-64 crosstests.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1273153002.
diff --git a/Makefile.standalone b/Makefile.standalone
index 0248a11..d53332f 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -325,6 +325,7 @@
RT_SRC := runtime/szrt.c runtime/szrt_ll.ll runtime/szrt_profiler.c
RT_OBJ := build/runtime/szrt_native_x8632.o build/runtime/szrt_sb_x8632.o \
+ build/runtime/szrt_native_x8664.o build/runtime/szrt_sb_x8664.o \
build/runtime/szrt_native_arm32.o build/runtime/szrt_sb_arm32.o
runtime: $(RT_OBJ)
@@ -348,10 +349,13 @@
check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
# Do all native/sse2 tests, but only test_vector_ops for native/sse4.1.
# For (slow) sandboxed tests, limit to Om1/sse4.1.
+ # TODO(jpp): implement x8664 sandbox, then enable xtests.
./pydir/crosstest_generator.py -v --lit \
--toolchain-root $(TOOLCHAIN_ROOT) \
-i x8632,native,sse2 -i x8632,native,sse4.1,test_vector_ops \
-i x8632,sandbox,sse4.1,Om1 \
+ -i x8664,native,sse2 -i x8664,native,sse4.1,test_vector_ops \
+ -e x8664,native,sse2,test_global \
-i arm32,native,neon,simple_loop \
-i arm32,native,neon,mem_intrin \
-i arm32,native,neon,test_bitmanip \
diff --git a/crosstest/mem_intrin.cpp b/crosstest/mem_intrin.cpp
index 612edce..0fe0387 100644
--- a/crosstest/mem_intrin.cpp
+++ b/crosstest/mem_intrin.cpp
@@ -8,6 +8,7 @@
#include <cstring>
#include "mem_intrin.h"
+#include "xdefs.h"
typedef int elem_t;
@@ -15,9 +16,9 @@
* Reset buf to the sequence of bytes: n, n+1, n+2 ... length - 1
*/
static void __attribute__((noinline))
-reset_buf(uint8_t *buf, uint8_t init, size_t length) {
- size_t i;
- size_t v = init;
+reset_buf(uint8_t *buf, uint8_t init, SizeT length) {
+ SizeT i;
+ SizeT v = init;
for (i = 0; i < length; ++i)
buf[i] = v++;
}
@@ -27,8 +28,8 @@
* smaller buffers, whose total won't approach 2**16).
*/
static int __attribute__((noinline))
-fletcher_checksum(uint8_t *buf, size_t length) {
- size_t i;
+fletcher_checksum(uint8_t *buf, SizeT length) {
+ SizeT i;
int sum = 0;
int sum_of_sums = 0;
const int kModulus = 255;
@@ -63,20 +64,20 @@
return fletcher_checksum((uint8_t *)buf, BYTE_LENGTH);
}
-int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
+int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
reset_buf(buf, init, length);
memcpy((void *)buf2, (void *)buf, length);
return fletcher_checksum(buf2, length);
}
-int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
+int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
int sum1;
int sum2;
const int overlap_bytes = 4 * sizeof(elem_t);
if (length <= overlap_bytes)
return 0;
uint8_t *overlap_buf = buf + overlap_bytes;
- size_t reduced_length = length - overlap_bytes;
+ SizeT reduced_length = length - overlap_bytes;
reset_buf(buf, init, length);
/* Test w/ overlap. */
@@ -88,7 +89,7 @@
return sum1 + sum2;
}
-int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length) {
+int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
memset((void *)buf, init, length);
memset((void *)buf2, init + 4, length);
return fletcher_checksum(buf, length) + fletcher_checksum(buf2, length);
diff --git a/crosstest/mem_intrin.h b/crosstest/mem_intrin.h
index 70f02ae..f04e1b2 100644
--- a/crosstest/mem_intrin.h
+++ b/crosstest/mem_intrin.h
@@ -4,10 +4,11 @@
* There is no include guard since this will be included multiple times,
* under different namespaces.
*/
+#include "xdefs.h"
-int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length);
-int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length);
-int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, size_t length);
+int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
+int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
+int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memcpy_test_fixed_len(uint8_t init);
int memmove_test_fixed_len(uint8_t init);
diff --git a/crosstest/mem_intrin_main.cpp b/crosstest/mem_intrin_main.cpp
index 70e3a67..e1102ec 100644
--- a/crosstest/mem_intrin_main.cpp
+++ b/crosstest/mem_intrin_main.cpp
@@ -5,6 +5,8 @@
#include <cstdio>
#include "mem_intrin.h"
+#include "xdefs.h"
+
namespace Subzero_ {
#include "mem_intrin.h"
}
@@ -12,7 +14,7 @@
#define XSTR(s) STR(s)
#define STR(s) #s
-void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
#define do_test_fixed(test_func) \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \
@@ -33,11 +35,11 @@
#undef do_test_fixed
}
-void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+void testVariableLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
uint8_t buf[256];
uint8_t buf2[256];
#define do_test_variable(test_func) \
- for (size_t len = 4; len < 128; ++len) { \
+ for (SizeT len = 4; len < 128; ++len) { \
for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
++TotalTests; \
int llc_result = test_func(buf, buf2, init_val, len); \
@@ -58,7 +60,11 @@
#undef do_test_variable
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
unsigned TotalTests = 0;
unsigned Passes = 0;
unsigned Failures = 0;
diff --git a/crosstest/simple_loop_main.c b/crosstest/simple_loop_main.c
index 5ff36b8..6c738b9 100644
--- a/crosstest/simple_loop_main.c
+++ b/crosstest/simple_loop_main.c
@@ -6,7 +6,11 @@
int simple_loop(int *a, int n);
int Subzero_simple_loop(int *a, int n);
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
unsigned TotalTests = 0;
unsigned Passes = 0;
unsigned Failures = 0;
diff --git a/crosstest/stack_hack.x8664.c b/crosstest/stack_hack.x8664.c
new file mode 100644
index 0000000..45b0bb7
--- /dev/null
+++ b/crosstest/stack_hack.x8664.c
@@ -0,0 +1,76 @@
+//===- subzero/crosstest/stack_hack.x8664.c - X8664 stack hack ------------===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements main() for crosstests in x86-64.
+//
+//===----------------------------------------------------------------------===//
+#include <assert.h>
+#include <stdint.h>
+
+#include <sys/mman.h>
+
+// X8664_STACK_HACK needs to be defined before xdefs.h is included.
+#define X8664_STACK_HACK
+#include "xdefs.h"
+
+/// xSetStack is used to set %rsp to NewRsp. OldRsp is a pointer that will be
+/// used to save the old %rsp value.
+#define xSetStack(NewRsp, OldRsp) \
+ do { \
+ __asm__ volatile("xchgq %1, %%rsp\n\t" \
+ "xchgq %1, %0" \
+ : "=r"(*(OldRsp)) \
+ : "r"(NewRsp)); \
+ } while (0)
+
+extern int wrapped_main(int argc, char *argv[]);
+
+unsigned char *xStackStart(uint32 StackEnd, uint32 Size) {
+ const uint32 PageBoundary = 4 << 20; // 4 MB.
+ const uint64 StackStart = StackEnd - Size;
+ assert(StackStart + (PageBoundary - 1) & ~(PageBoundary - 1) &&
+ "StackStart not aligned to page boundary.");
+ (void)PageBoundary;
+ assert((StackStart & 0xFFFFFFFF00000000ull) == 0 && "StackStart wraps.");
+ return (unsigned char *)StackStart;
+}
+
+unsigned char *xAllocStack(uint64 StackEnd, uint32 Size) {
+ assert((StackEnd & 0xFFFFFFFF00000000ull) == 0 && "Invalid StackEnd.");
+ void *Stack =
+ mmap(xStackStart(StackEnd, Size), Size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_GROWSDOWN | MAP_ANONYMOUS, -1, 0);
+ assert(Stack != MAP_FAILED && "mmap failed. no stack.");
+ return Stack;
+}
+
+void xDeallocStack(uint64 StackEnd, uint32 Size) {
+ assert((StackEnd & 0xFFFFFFFF00000000ull) == 0 && "Invalid StackEnd.");
+ munmap(xStackStart(StackEnd, Size), Size);
+}
+
+int main(int argc, char *argv[]) {
+ // These "locals" need to live **NOT** in the stack.
+ static int Argc;
+ static char **Argv;
+ static const uint32_t StackEnd = 0x80000000;
+ static const uint32_t StackSize = 40 * 1024 * 1024;
+ static unsigned char *new_rsp;
+ static unsigned char *old_rsp;
+ static unsigned char *dummy_rsp;
+ static int Failures;
+ Argc = argc;
+ Argv = argv;
+ new_rsp = xAllocStack(StackEnd, StackSize) + StackSize;
+ xSetStack(new_rsp, &old_rsp);
+ Failures = wrapped_main(Argc, Argv);
+ xSetStack(old_rsp, &new_rsp);
+ xDeallocStack(StackEnd, StackSize);
+ return Failures;
+}
diff --git a/crosstest/test_arith.cpp b/crosstest/test_arith.cpp
index 446ea04..47fd47b 100644
--- a/crosstest/test_arith.cpp
+++ b/crosstest/test_arith.cpp
@@ -17,13 +17,14 @@
#include <stdint.h>
#include "test_arith.h"
+#include "xdefs.h"
#define X(inst, op, isdiv, isshift) \
bool test##inst(bool a, bool b) { return a op b; } \
uint8_t test##inst(uint8_t a, uint8_t b) { return a op b; } \
uint16_t test##inst(uint16_t a, uint16_t b) { return a op b; } \
uint32_t test##inst(uint32_t a, uint32_t b) { return a op b; } \
- uint64_t test##inst(uint64_t a, uint64_t b) { return a op b; } \
+ uint64 test##inst(uint64 a, uint64 b) { return a op b; } \
v4ui32 test##inst(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 test##inst(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 test##inst(v16ui8 a, v16ui8 b) { return a op b; }
@@ -35,7 +36,7 @@
myint8_t test##inst(myint8_t a, myint8_t b) { return a op b; } \
int16_t test##inst(int16_t a, int16_t b) { return a op b; } \
int32_t test##inst(int32_t a, int32_t b) { return a op b; } \
- int64_t test##inst(int64_t a, int64_t b) { return a op b; } \
+ int64 test##inst(int64 a, int64 b) { return a op b; } \
v4si32 test##inst(v4si32 a, v4si32 b) { return a op b; } \
v8si16 test##inst(v8si16 a, v8si16 b) { return a op b; } \
v16si8 test##inst(v16si8 a, v16si8 b) { return a op b; }
diff --git a/crosstest/test_arith.h b/crosstest/test_arith.h
index be7f0b1..e348418 100644
--- a/crosstest/test_arith.h
+++ b/crosstest/test_arith.h
@@ -14,6 +14,7 @@
#include <stdint.h>
#include "test_arith.def"
+#include "xdefs.h"
#include "vectors.h"
@@ -22,7 +23,7 @@
uint8_t test##inst(uint8_t a, uint8_t b); \
uint16_t test##inst(uint16_t a, uint16_t b); \
uint32_t test##inst(uint32_t a, uint32_t b); \
- uint64_t test##inst(uint64_t a, uint64_t b); \
+ uint64 test##inst(uint64 a, uint64 b); \
v4ui32 test##inst(v4ui32 a, v4ui32 b); \
v8ui16 test##inst(v8ui16 a, v8ui16 b); \
v16ui8 test##inst(v16ui8 a, v16ui8 b);
@@ -34,7 +35,7 @@
myint8_t test##inst(myint8_t a, myint8_t b); \
int16_t test##inst(int16_t a, int16_t b); \
int32_t test##inst(int32_t a, int32_t b); \
- int64_t test##inst(int64_t a, int64_t b); \
+ int64 test##inst(int64 a, int64 b); \
v4si32 test##inst(v4si32 a, v4si32 b); \
v8si16 test##inst(v8si16 a, v8si16 b); \
v16si8 test##inst(v16si8 a, v16si8 b);
diff --git a/crosstest/test_arith_main.cpp b/crosstest/test_arith_main.cpp
index b4c70ee..8f420f1 100644
--- a/crosstest/test_arith_main.cpp
+++ b/crosstest/test_arith_main.cpp
@@ -28,6 +28,8 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_arith.h"
+#include "xdefs.h"
+
namespace Subzero_ {
#include "test_arith.h"
}
@@ -363,7 +365,11 @@
}
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
@@ -372,7 +378,7 @@
testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures);
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
- testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
+ testsInt<uint64, int64>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures);
diff --git a/crosstest/test_bitmanip.def b/crosstest/test_bitmanip.def
index 0dac033..443ad5a 100644
--- a/crosstest/test_bitmanip.def
+++ b/crosstest/test_bitmanip.def
@@ -14,6 +14,8 @@
#ifndef TEST_BIT_MANIP_DEF
#define TEST_BIT_MANIP_DEF
+#include "xdefs.h"
+
#define STR(s) #s
#define BMI_OPS \
@@ -25,13 +27,13 @@
#define BMI_TYPES \
/* type */ \
- X(uint32_t) \
- X(uint64_t)
+ X(uint32) \
+ X(uint64)
// #define X(type)
#define FOR_ALL_BMI_TYPES_INST(F, inst) \
- F(inst, uint32_t) \
- F(inst, uint64_t)
+ F(inst, uint32) \
+ F(inst, uint64)
#define FOR_ALL_BMI_OP_TYPES(X) \
FOR_ALL_BMI_TYPES_INST(X, ctlz) \
@@ -42,7 +44,7 @@
#define BSWAP_TABLE \
/* type, builtin_name */ \
X(uint16_t, __builtin_bswap16) \
- X(uint32_t, __builtin_bswap32) \
- X(uint64_t, __builtin_bswap64)
+ X(uint32, __builtin_bswap32) \
+ X(uint64, __builtin_bswap64)
#endif // TEST_BIT_MANIP_DEF
diff --git a/crosstest/test_bitmanip_main.cpp b/crosstest/test_bitmanip_main.cpp
index dfb5d1a..f872f20 100644
--- a/crosstest/test_bitmanip_main.cpp
+++ b/crosstest/test_bitmanip_main.cpp
@@ -23,11 +23,13 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_bitmanip.h"
+#include "xdefs.h"
+
namespace Subzero_ {
#include "test_bitmanip.h"
}
-volatile uint64_t Values[] = {
+volatile uint64 Values[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0xc0de, 0xabcd, 0xdcba, 0x007fffff /*Max subnormal + */,
0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/,
@@ -71,9 +73,9 @@
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
- << "(" << static_cast<uint64_t>(Value)
- << "): sz=" << static_cast<uint64_t>(ResultSz)
- << " llc=" << static_cast<uint64_t>(ResultLlc) << "\n";
+ << "(" << static_cast<uint64>(Value)
+ << "): sz=" << static_cast<uint64>(ResultSz)
+ << " llc=" << static_cast<uint64>(ResultLlc) << "\n";
}
}
}
@@ -101,24 +103,28 @@
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
- << "(" << static_cast<uint64_t>(Value)
- << "): sz=" << static_cast<uint64_t>(ResultSz)
- << " llc=" << static_cast<uint64_t>(ResultLlc) << "\n";
+ << "(" << static_cast<uint64>(Value)
+ << "): sz=" << static_cast<uint64>(ResultSz)
+ << " llc=" << static_cast<uint64>(ResultLlc) << "\n";
}
}
}
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
testBitManip<uint32_t>(TotalTests, Passes, Failures);
- testBitManip<uint64_t>(TotalTests, Passes, Failures);
+ testBitManip<uint64>(TotalTests, Passes, Failures);
testByteSwap<uint16_t>(TotalTests, Passes, Failures);
testByteSwap<uint32_t>(TotalTests, Passes, Failures);
- testByteSwap<uint64_t>(TotalTests, Passes, Failures);
+ testByteSwap<uint64>(TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
diff --git a/crosstest/test_calling_conv.cpp b/crosstest/test_calling_conv.cpp
index e7fa616..364b0df 100644
--- a/crosstest/test_calling_conv.cpp
+++ b/crosstest/test_calling_conv.cpp
@@ -17,6 +17,7 @@
#include <cstring>
#include "test_calling_conv.h"
+#include "xdefs.h"
#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
@@ -37,9 +38,9 @@
void caller_vlvlivfvdviv(void) {
v4f32 arg1 = {0, 1, 2, 3};
- int64_t arg2 = 4;
+ int64 arg2 = 4;
v4f32 arg3 = {6, 7, 8, 9};
- int64_t arg4 = 10;
+ int64 arg4 = 10;
int arg5 = 11;
v4f32 arg6 = {12, 13, 14, 15};
float arg7 = 16;
@@ -75,8 +76,8 @@
}
void __attribute__((noinline))
-callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4,
- int arg5, v4f32 arg6, float arg7, v4f32 arg8, double arg9,
+callee_vlvlivfvdviv(v4f32 arg1, int64 arg2, v4f32 arg3, int64 arg4, int arg5,
+ v4f32 arg6, float arg7, v4f32 arg8, double arg9,
v4f32 arg10, int arg11, v4f32 arg12) {
switch (ArgNum) {
HANDLE_ARG(1);
diff --git a/crosstest/test_calling_conv.h b/crosstest/test_calling_conv.h
index 6cff49b..bf36465 100644
--- a/crosstest/test_calling_conv.h
+++ b/crosstest/test_calling_conv.h
@@ -14,6 +14,7 @@
#include "test_calling_conv.def"
#include "vectors.h"
+#include "xdefs.h"
typedef void (*CalleePtrTy)();
extern CalleePtrTy Callee;
@@ -31,6 +32,6 @@
callee_vvvvv_Ty callee_vvvvv;
void caller_vlvlivfvdviv();
-typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32,
+typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64, v4f32, int64, int, v4f32,
float, v4f32, double, v4f32, int, v4f32);
callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
diff --git a/crosstest/test_calling_conv_main.cpp b/crosstest/test_calling_conv_main.cpp
index ce5ecda..f0feef0 100644
--- a/crosstest/test_calling_conv_main.cpp
+++ b/crosstest/test_calling_conv_main.cpp
@@ -162,7 +162,11 @@
}
}
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
diff --git a/crosstest/test_cast.cpp b/crosstest/test_cast.cpp
index 6298320..a2668f8 100644
--- a/crosstest/test_cast.cpp
+++ b/crosstest/test_cast.cpp
@@ -16,6 +16,7 @@
#include <stdint.h>
#include "test_cast.h"
+#include "xdefs.h"
template <typename FromType, typename ToType>
ToType __attribute__((noinline)) cast(FromType a) {
@@ -38,8 +39,8 @@
static ToType f(uint16_t a) { return cast<uint16_t, ToType>(a); }
static ToType f(int32_t a) { return cast<int32_t, ToType>(a); }
static ToType f(uint32_t a) { return cast<uint32_t, ToType>(a); }
- static ToType f(int64_t a) { return cast<int64_t, ToType>(a); }
- static ToType f(uint64_t a) { return cast<uint64_t, ToType>(a); }
+ static ToType f(int64 a) { return cast<int64, ToType>(a); }
+ static ToType f(uint64 a) { return cast<uint64, ToType>(a); }
static ToType f(float a) { return cast<float, ToType>(a); }
static ToType f(double a) { return cast<double, ToType>(a); }
};
@@ -56,8 +57,8 @@
template class Caster<uint16_t>;
template class Caster<int32_t>;
template class Caster<uint32_t>;
-template class Caster<int64_t>;
-template class Caster<uint64_t>;
+template class Caster<int64>;
+template class Caster<uint64>;
template class Caster<float>;
template class Caster<double>;
@@ -67,8 +68,8 @@
double makeBitCasters() {
double Result = 0;
Result += castBits<uint32_t, float>(0);
- Result += castBits<uint64_t, double>(0);
+ Result += castBits<uint64, double>(0);
Result += castBits<float, uint32_t>(0);
- Result += castBits<double, uint64_t>(0);
+ Result += castBits<double, uint64>(0);
return Result;
}
diff --git a/crosstest/test_cast_main.cpp b/crosstest/test_cast_main.cpp
index c395597..4596bfa 100644
--- a/crosstest/test_cast_main.cpp
+++ b/crosstest/test_cast_main.cpp
@@ -22,6 +22,7 @@
#include "test_arith.def"
#include "vectors.h"
+#include "xdefs.h"
// Include test_cast.h twice - once normally, and once within the
// Subzero_ namespace, corresponding to the llc and Subzero translated
@@ -82,8 +83,8 @@
COMPARE(cast, FromType, int16_t, Val, FromTypeString);
COMPARE(cast, FromType, uint32_t, Val, FromTypeString);
COMPARE(cast, FromType, int32_t, Val, FromTypeString);
- COMPARE(cast, FromType, uint64_t, Val, FromTypeString);
- COMPARE(cast, FromType, int64_t, Val, FromTypeString);
+ COMPARE(cast, FromType, uint64, Val, FromTypeString);
+ COMPARE(cast, FromType, int64, Val, FromTypeString);
COMPARE(cast, FromType, float, Val, FromTypeString);
COMPARE(cast, FromType, double, Val, FromTypeString);
}
@@ -110,7 +111,11 @@
}
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
@@ -147,7 +152,7 @@
0x80000000, 0x80000001, 0xfffffffe, 0xffffffff};
static const size_t NumValsSi32 = sizeof(ValsSi32) / sizeof(*ValsSi32);
- volatile uint64_t ValsUi64[] = {
+ volatile uint64 ValsUi64[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ull, 0x100000001ull,
@@ -155,7 +160,7 @@
0x8000000000000001ull, 0xfffffffffffffffeull, 0xffffffffffffffffull};
static const size_t NumValsUi64 = sizeof(ValsUi64) / sizeof(*ValsUi64);
- volatile int64_t ValsSi64[] = {
+ volatile int64 ValsSi64[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x100000000ll, 0x100000001ll,
@@ -203,13 +208,13 @@
testValue<int32_t>(Val, TotalTests, Passes, Failures, "int32_t");
}
for (size_t i = 0; i < NumValsUi64; ++i) {
- uint64_t Val = ValsUi64[i];
- testValue<uint64_t>(Val, TotalTests, Passes, Failures, "uint64_t");
- COMPARE(castBits, uint64_t, double, Val, "uint64_t");
+ uint64 Val = ValsUi64[i];
+ testValue<uint64>(Val, TotalTests, Passes, Failures, "uint64");
+ COMPARE(castBits, uint64, double, Val, "uint64");
}
for (size_t i = 0; i < NumValsSi64; ++i) {
- int64_t Val = ValsSi64[i];
- testValue<int64_t>(Val, TotalTests, Passes, Failures, "int64_t");
+ int64 Val = ValsSi64[i];
+ testValue<int64>(Val, TotalTests, Passes, Failures, "int64");
}
for (size_t i = 0; i < NumValsF32; ++i) {
for (unsigned j = 0; j < 2; ++j) {
@@ -226,7 +231,7 @@
if (j > 0)
Val = -Val;
testValue<double>(Val, TotalTests, Passes, Failures, "double");
- COMPARE(castBits, double, uint64_t, Val, "double");
+ COMPARE(castBits, double, uint64, Val, "double");
}
}
testVector<v4ui32, v4f32>(TotalTests, Passes, Failures, "v4ui32", "v4f32");
diff --git a/crosstest/test_fcmp_main.cpp b/crosstest/test_fcmp_main.cpp
index 9e9c32c..b38de31 100644
--- a/crosstest/test_fcmp_main.cpp
+++ b/crosstest/test_fcmp_main.cpp
@@ -159,7 +159,11 @@
}
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
diff --git a/crosstest/test_icmp.cpp b/crosstest/test_icmp.cpp
index 5ca2c46..6057545 100644
--- a/crosstest/test_icmp.cpp
+++ b/crosstest/test_icmp.cpp
@@ -15,12 +15,13 @@
#include <stdint.h>
#include "test_icmp.h"
+#include "xdefs.h"
#define X(cmp, op) \
bool icmp##cmp(uint8_t a, uint8_t b) { return a op b; } \
bool icmp##cmp(uint16_t a, uint16_t b) { return a op b; } \
bool icmp##cmp(uint32_t a, uint32_t b) { return a op b; } \
- bool icmp##cmp(uint64_t a, uint64_t b) { return a op b; } \
+ bool icmp##cmp(uint64 a, uint64 b) { return a op b; } \
v4ui32 icmp##cmp(v4ui32 a, v4ui32 b) { return a op b; } \
v8ui16 icmp##cmp(v8ui16 a, v8ui16 b) { return a op b; } \
v16ui8 icmp##cmp(v16ui8 a, v16ui8 b) { return a op b; }
@@ -31,7 +32,7 @@
bool icmp##cmp(myint8_t a, myint8_t b) { return a op b; } \
bool icmp##cmp(int16_t a, int16_t b) { return a op b; } \
bool icmp##cmp(int32_t a, int32_t b) { return a op b; } \
- bool icmp##cmp(int64_t a, int64_t b) { return a op b; } \
+ bool icmp##cmp(int64 a, int64 b) { return a op b; } \
v4si32 icmp##cmp(v4si32 a, v4si32 b) { return a op b; } \
v8si16 icmp##cmp(v8si16 a, v8si16 b) { return a op b; } \
v16si8 icmp##cmp(v16si8 a, v16si8 b) { return a op b; }
diff --git a/crosstest/test_icmp.h b/crosstest/test_icmp.h
index 8a264d0..e1ea495 100644
--- a/crosstest/test_icmp.h
+++ b/crosstest/test_icmp.h
@@ -15,12 +15,13 @@
#include "test_icmp.def"
#include "vectors.h"
+#include "xdefs.h"
#define X(cmp, op) \
bool icmp##cmp(uint8_t a, uint8_t b); \
bool icmp##cmp(uint16_t a, uint16_t b); \
bool icmp##cmp(uint32_t a, uint32_t b); \
- bool icmp##cmp(uint64_t a, uint64_t b); \
+ bool icmp##cmp(uint64 a, uint64 b); \
v4ui32 icmp##cmp(v4ui32 a, v4ui32 b); \
v8ui16 icmp##cmp(v8ui16 a, v8ui16 b); \
v16ui8 icmp##cmp(v16ui8 a, v16ui8 b);
@@ -31,7 +32,7 @@
bool icmp##cmp(myint8_t a, myint8_t b); \
bool icmp##cmp(int16_t a, int16_t b); \
bool icmp##cmp(int32_t a, int32_t b); \
- bool icmp##cmp(int64_t a, int64_t b); \
+ bool icmp##cmp(int64 a, int64 b); \
v4si32 icmp##cmp(v4si32 a, v4si32 b); \
v8si16 icmp##cmp(v8si16 a, v8si16 b); \
v16si8 icmp##cmp(v16si8 a, v16si8 b);
diff --git a/crosstest/test_icmp_main.cpp b/crosstest/test_icmp_main.cpp
index f27d53b..82e5b66 100644
--- a/crosstest/test_icmp_main.cpp
+++ b/crosstest/test_icmp_main.cpp
@@ -23,10 +23,13 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_icmp.h"
+
namespace Subzero_ {
#include "test_icmp.h"
}
+#include "xdefs.h"
+
volatile unsigned Values[] = {
0x0, 0x1, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001,
0xfffffffe, 0xffffffff, 0x7e, 0x7f, 0x80, 0x81,
@@ -265,7 +268,11 @@
}
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
@@ -273,7 +280,7 @@
testsInt<uint8_t, myint8_t>(TotalTests, Passes, Failures);
testsInt<uint16_t, int16_t>(TotalTests, Passes, Failures);
testsInt<uint32_t, int32_t>(TotalTests, Passes, Failures);
- testsInt<uint64_t, int64_t>(TotalTests, Passes, Failures);
+ testsInt<uint64, int64>(TotalTests, Passes, Failures);
testsVecInt<v4ui32, v4si32>(TotalTests, Passes, Failures);
testsVecInt<v8ui16, v8si16>(TotalTests, Passes, Failures);
testsVecInt<v16ui8, v16si8>(TotalTests, Passes, Failures);
diff --git a/crosstest/test_select_main.cpp b/crosstest/test_select_main.cpp
index 5ccdcfb..1973416 100644
--- a/crosstest/test_select_main.cpp
+++ b/crosstest/test_select_main.cpp
@@ -130,7 +130,11 @@
}
}
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
diff --git a/crosstest/test_stacksave_main.c b/crosstest/test_stacksave_main.c
index 0691025..f03304e 100644
--- a/crosstest/test_stacksave_main.c
+++ b/crosstest/test_stacksave_main.c
@@ -22,7 +22,11 @@
DECLARE_TESTS()
DECLARE_TESTS(Subzero_)
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
diff --git a/crosstest/test_strengthreduce_main.cpp b/crosstest/test_strengthreduce_main.cpp
index 2c2aa98..acde64e 100644
--- a/crosstest/test_strengthreduce_main.cpp
+++ b/crosstest/test_strengthreduce_main.cpp
@@ -25,7 +25,11 @@
#include "test_strengthreduce.h"
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
diff --git a/crosstest/test_sync_atomic.def b/crosstest/test_sync_atomic.def
index f84afde..7c05571 100644
--- a/crosstest/test_sync_atomic.def
+++ b/crosstest/test_sync_atomic.def
@@ -14,6 +14,8 @@
#ifndef TEST_SYNC_ATOMIC_DEF
#define TEST_SYNC_ATOMIC_DEF
+#include "xdefs.h"
+
#define STR(s) #s
#define RMWOP_TABLE \
@@ -30,14 +32,14 @@
X(uint8_t) \
X(uint16_t) \
X(uint32_t) \
- X(uint64_t)
+ X(uint64)
//#define X(type)
#define FOR_ALL_RMWTYPES_INST(F, inst) \
F(inst, uint8_t) \
F(inst, uint16_t) \
F(inst, uint32_t) \
- F(inst, uint64_t)
+ F(inst, uint64)
#define FOR_ALL_RMWOP_TYPES(X) \
FOR_ALL_RMWTYPES_INST(X, add) \
diff --git a/crosstest/test_sync_atomic_main.cpp b/crosstest/test_sync_atomic_main.cpp
index 63dfc80..f935e0a 100644
--- a/crosstest/test_sync_atomic_main.cpp
+++ b/crosstest/test_sync_atomic_main.cpp
@@ -28,11 +28,12 @@
// Subzero_ namespace, corresponding to the llc and Subzero translated
// object files, respectively.
#include "test_sync_atomic.h"
+#include "xdefs.h"
namespace Subzero_ {
#include "test_sync_atomic.h"
}
-volatile uint64_t Values[] = {
+volatile uint64 Values[] = {
0, 1, 0x7e, 0x7f, 0x80, 0x81, 0xfe, 0xff, 0x7ffe, 0x7fff, 0x8000, 0x8001,
0xfffe, 0xffff, 0x007fffff /*Max subnormal + */, 0x00800000 /*Min+ */,
0x7f7fffff /*Max+ */, 0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/,
@@ -51,7 +52,7 @@
volatile uint8_t l8;
volatile uint16_t l16;
volatile uint32_t l32;
- volatile uint64_t l64;
+ volatile uint64 l64;
} AtomicLocs;
template <typename Type>
@@ -91,12 +92,12 @@
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
- << "(" << static_cast<uint64_t>(Value1) << ", "
- << static_cast<uint64_t>(Value2)
- << "): sz1=" << static_cast<uint64_t>(ResultSz1)
- << " llc1=" << static_cast<uint64_t>(ResultLlc1)
- << " sz2=" << static_cast<uint64_t>(ResultSz2)
- << " llc2=" << static_cast<uint64_t>(ResultLlc2) << "\n";
+ << "(" << static_cast<uint64>(Value1) << ", "
+ << static_cast<uint64>(Value2)
+ << "): sz1=" << static_cast<uint64>(ResultSz1)
+ << " llc1=" << static_cast<uint64>(ResultLlc1)
+ << " sz2=" << static_cast<uint64>(ResultSz2)
+ << " llc2=" << static_cast<uint64>(ResultLlc2) << "\n";
}
}
}
@@ -137,12 +138,12 @@
} else {
++Failures;
std::cout << "test_" << Funcs[f].Name << (CHAR_BIT * sizeof(Type))
- << "(" << static_cast<uint64_t>(Value1) << ", "
- << static_cast<uint64_t>(Value2)
- << "): sz1=" << static_cast<uint64_t>(ResultSz1)
- << " llc1=" << static_cast<uint64_t>(ResultLlc1)
- << " sz2=" << static_cast<uint64_t>(ResultSz2)
- << " llc2=" << static_cast<uint64_t>(ResultLlc2) << "\n";
+ << "(" << static_cast<uint64>(Value1) << ", "
+ << static_cast<uint64>(Value2)
+ << "): sz1=" << static_cast<uint64>(ResultSz1)
+ << " llc1=" << static_cast<uint64>(ResultLlc1)
+ << " sz2=" << static_cast<uint64>(ResultSz2)
+ << " llc2=" << static_cast<uint64>(ResultLlc2) << "\n";
}
}
}
@@ -166,6 +167,22 @@
return NULL;
}
+#ifndef X8664_STACK_HACK
+void AllocStackForThread(uint32, pthread_attr_t *) {}
+#else // defined(X8664_STACK_HACK)
+void AllocStackForThread(uint32 m, pthread_attr_t *attr) {
+ static const uint32_t ThreadStackBase = 0x60000000;
+ static const uint32_t ThreadStackSize = 4 << 20; // 4MB.
+ if (pthread_attr_setstack(
+ attr, xAllocStack(ThreadStackBase - 2 * m * ThreadStackSize,
+ ThreadStackSize),
+ ThreadStackSize) != 0) {
+ std::cout << "pthread_attr_setstack: " << strerror(errno) << "\n";
+ abort();
+ }
+}
+#endif // X8664_STACK_HACK
+
template <typename Type>
void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
size_t &Passes, size_t &Failures) {
@@ -184,7 +201,7 @@
const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
// Just test a few values, otherwise it takes a *really* long time.
- volatile uint64_t ValuesSubset[] = {1, 0x7e, 0x000fffffffffffffffll};
+ volatile uint64 ValuesSubset[] = {1, 0x7e, 0x000fffffffffffffffll};
const size_t NumValuesSubset = sizeof(ValuesSubset) / sizeof(*ValuesSubset);
for (size_t f = 0; f < NumFuncs; ++f) {
@@ -200,12 +217,18 @@
++TotalTests;
const size_t NumThreads = 4;
pthread_t t[NumThreads];
+ pthread_attr_t attr[NumThreads];
// Try N threads w/ just Llc.
*AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) {
- pthread_create(&t[m], NULL, &threadWrapper<Type>,
- reinterpret_cast<void *>(&TDataLlc));
+ pthread_attr_init(&attr[m]);
+ AllocStackForThread(m, &attr[m]);
+ if (pthread_create(&t[m], &attr[m], &threadWrapper<Type>,
+ reinterpret_cast<void *>(&TDataLlc)) != 0) {
+ std::cout << "pthread_create failed w/ " << strerror(errno) << "\n";
+ abort();
+ }
}
for (size_t m = 0; m < NumThreads; ++m) {
pthread_join(t[m], NULL);
@@ -215,7 +238,9 @@
// Try N threads w/ both Sz and Llc.
*AtomicLoc = Value1;
for (size_t m = 0; m < NumThreads; ++m) {
- if (pthread_create(&t[m], NULL, &threadWrapper<Type>,
+ pthread_attr_init(&attr[m]);
+ AllocStackForThread(m, &attr[m]);
+ if (pthread_create(&t[m], &attr[m], &threadWrapper<Type>,
m % 2 == 0
? reinterpret_cast<void *>(&TDataLlc)
: reinterpret_cast<void *>(&TDataSz)) != 0) {
@@ -238,18 +263,21 @@
} else {
++Failures;
std::cout << "test_with_threads_" << Funcs[f].Name
- << (8 * sizeof(Type)) << "("
- << static_cast<uint64_t>(Value1) << ", "
- << static_cast<uint64_t>(Value2)
- << "): llc=" << static_cast<uint64_t>(ResultLlc)
- << " mixed=" << static_cast<uint64_t>(ResultMixed) << "\n";
+ << (8 * sizeof(Type)) << "(" << static_cast<uint64>(Value1)
+ << ", " << static_cast<uint64>(Value2)
+ << "): llc=" << static_cast<uint64>(ResultLlc)
+ << " mixed=" << static_cast<uint64>(ResultMixed) << "\n";
}
}
}
}
}
-int main(int argc, char **argv) {
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
+int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
@@ -257,18 +285,17 @@
testAtomicRMW<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMW<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMW<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
- testAtomicRMW<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures);
+ testAtomicRMW<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testValCompareAndSwap<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testValCompareAndSwap<uint16_t>(&AtomicLocs.l16, TotalTests, Passes,
Failures);
testValCompareAndSwap<uint32_t>(&AtomicLocs.l32, TotalTests, Passes,
Failures);
- testValCompareAndSwap<uint64_t>(&AtomicLocs.l64, TotalTests, Passes,
- Failures);
+ testValCompareAndSwap<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
testAtomicRMWThreads<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
- testAtomicRMWThreads<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures);
+ testAtomicRMWThreads<uint64>(&AtomicLocs.l64, TotalTests, Passes, Failures);
std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
<< " Failures=" << Failures << "\n";
diff --git a/crosstest/test_vector_ops_main.cpp b/crosstest/test_vector_ops_main.cpp
index 1232799..4b9591a 100644
--- a/crosstest/test_vector_ops_main.cpp
+++ b/crosstest/test_vector_ops_main.cpp
@@ -130,7 +130,11 @@
free(TestVectors);
}
+#ifdef X8664_STACK_HACK
+extern "C" int wrapped_main(int argc, char *argv[]) {
+#else // !defined(X8664_STACK_HACK)
int main(int argc, char *argv[]) {
+#endif // X8664_STACK_HACK
size_t TotalTests = 0;
size_t Passes = 0;
size_t Failures = 0;
diff --git a/crosstest/xdefs.h b/crosstest/xdefs.h
new file mode 100644
index 0000000..00a4512
--- /dev/null
+++ b/crosstest/xdefs.h
@@ -0,0 +1,53 @@
+//===- subzero/crosstest/xdefs.h - Definitions for the crosstests. --------===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the int64 and uint64 types to avoid link-time errors when compiling
+// the crosstests in LP64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_CROSSTEST_XDEFS_H_
+#define SUBZERO_CROSSTEST_XDEFS_H_
+
+typedef unsigned int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+typedef unsigned int SizeT;
+
+#ifdef X8664_STACK_HACK
+
+// the X86_STACK_HACK is an intrusive way of getting the crosstests to run in
+// x86_64 LP64 even with an ILP32 model. This hack allocates a new stack for
+// running the tests in the low 4GB of the address space.
+
+#ifdef __cplusplus
+#define XTEST_EXTERN extern "C"
+#else // !defined(__cplusplus)
+#define XTEST_EXTERN extern
+#endif // __cplusplus
+
+/// xAllocStack allocates the memory chunk [StackEnd - Size - 1, StackEnd). It
+/// requires StackEnd to be less than 32-bits long. Conversely, xDeallocStack
+/// frees that memory chunk.
+/// {@
+XTEST_EXTERN unsigned char *xAllocStack(uint64 StackEnd, uint32 Size);
+XTEST_EXTERN void xDeallocStack(uint64 StackEnd, uint32 Size);
+/// @}
+
+// wrapped_main is invoked by the x86-64 stack hack main. We declare a prototype
+// so the compiler (and not the linker) can yell if a test's wrapped_main
+// prototype does not match what we want.
+XTEST_EXTERN int wrapped_main(int argc, char *argv[]);
+
+#undef XTEST_EXTERN
+
+#endif // X8664_STACK_HACK
+
+#endif // SUBZERO_CROSSTEST_XDEFS_H_
diff --git a/pydir/build-runtime.py b/pydir/build-runtime.py
index 4010b51..ad38a2e 100755
--- a/pydir/build-runtime.py
+++ b/pydir/build-runtime.py
@@ -124,6 +124,8 @@
MakeRuntimesForTarget(targets.X8632Target, ll_files,
srcdir, tempdir, rtdir, args.verbose)
+ MakeRuntimesForTarget(targets.X8664Target, ll_files,
+ srcdir, tempdir, rtdir, args.verbose)
MakeRuntimesForTarget(targets.ARM32Target, ll_files,
srcdir, tempdir, rtdir, args.verbose)
diff --git a/pydir/crosstest.py b/pydir/crosstest.py
index c8ab306..d5f240a 100755
--- a/pydir/crosstest.py
+++ b/pydir/crosstest.py
@@ -177,6 +177,18 @@
'szrt_{sb}_' + args.target + '.o'
).format(root=nacl_root, sb='sb' if args.sandbox else 'native'))
pure_c = os.path.splitext(args.driver)[1] == '.c'
+
+ # TargetX8664 is ilp32, but clang does not currently support such
+ # configuration. In order to run the crosstests we play nasty, dangerous
+ # tricks with the stack pointer.
+ needs_stack_hack = (args.target == 'x8664')
+ stack_hack_params = []
+ if needs_stack_hack:
+ shellcmd('{bin}/clang -g -o stack_hack.x8664.{key}.o -c '
+ 'stack_hack.x8664.c'.format(bin=bindir, key=key))
+ stack_hack_params.append('-DX8664_STACK_HACK')
+ stack_hack_params.append('stack_hack.x8664.{key}.o'.format(key=key))
+
# Set compiler to clang, clang++, pnacl-clang, or pnacl-clang++.
compiler = '{bin}/{prefix}{cc}'.format(
bin=bindir, prefix='pnacl-' if args.sandbox else '',
@@ -189,7 +201,7 @@
'-lm', '-lpthread',
'-Wl,--defsym=__Sz_AbsoluteZero=0'] +
target_info.cross_headers)
- shellcmd([compiler, args.driver] + objs +
+ shellcmd([compiler] + stack_hack_params + [args.driver] + objs +
['-o', os.path.join(args.dir, args.output)] + sb_native_args)
if __name__ == '__main__':
diff --git a/pydir/crosstest_generator.py b/pydir/crosstest_generator.py
index b5d64ab..dd72b80 100755
--- a/pydir/crosstest_generator.py
+++ b/pydir/crosstest_generator.py
@@ -55,15 +55,17 @@
root = FindBaseNaCl()
# The rest of the attribute sets.
- targets = [ 'x8632', 'arm32' ]
+ targets = [ 'x8632', 'x8664', 'arm32' ]
sandboxing = [ 'native', 'sandbox' ]
opt_levels = [ 'Om1', 'O2' ]
arch_attrs = { 'x8632': [ 'sse2', 'sse4.1' ],
+ 'x8664': [ 'sse2', 'sse4.1' ],
'arm32': [ 'neon', 'hwdiv-arm' ] }
flat_attrs = []
for v in arch_attrs.values():
flat_attrs += v
arch_flags = { 'x8632': [],
+ 'x8664': [],
# ARM doesn't have an integrated assembler yet.
'arm32': ['--filetype=asm'] }
# all_keys is only used in the help text.
diff --git a/pydir/targets.py b/pydir/targets.py
index c2188e5..3635e13 100644
--- a/pydir/targets.py
+++ b/pydir/targets.py
@@ -40,6 +40,5 @@
ld_emu='armelf_nacl',
cross_headers=['-isystem', FindARMCrossInclude()])
-
def ConvertTripleToNaCl(nonsfi_triple):
return nonsfi_triple.replace('linux', 'nacl')
diff --git a/runtime/szrt_profiler.c b/runtime/szrt_profiler.c
index e31692e..34a647ac2 100644
--- a/runtime/szrt_profiler.c
+++ b/runtime/szrt_profiler.c
@@ -1,3 +1,4 @@
+#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
@@ -53,7 +54,7 @@
printf("%s", SubzeroLogo);
for (const struct BlockProfileInfo **curr = &__Sz_block_profile_info;
*curr != NULL; ++curr) {
- printf("%lld\t%s\n", (*curr)->Counter, (*curr)->BlockName);
+ printf("%" PRIu64 "\t%s\n", (*curr)->Counter, (*curr)->BlockName);
}
fflush(stdout);
}
diff --git a/src/IceAssemblerX86Base.h b/src/IceAssemblerX86Base.h
index 9d872d2..c34b776 100644
--- a/src/IceAssemblerX86Base.h
+++ b/src/IceAssemblerX86Base.h
@@ -243,9 +243,9 @@
// Cross Xmm/GPR cast instructions.
template <typename DReg_t, typename SReg_t> struct CastEmitterRegOp {
- typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, SReg_t);
+ typedef void (AssemblerX86Base::*TypedEmitRegs)(Type, DReg_t, Type, SReg_t);
typedef void (AssemblerX86Base::*TypedEmitAddr)(
- Type, DReg_t, const typename Traits::Address &);
+ Type, DReg_t, Type, const typename Traits::Address &);
TypedEmitRegs RegReg;
TypedEmitAddr RegAddr;
@@ -299,7 +299,14 @@
typename Traits::GPRRegister src);
void mov(Type Ty, const typename Traits::Address &dst, const Immediate &imm);
- void movFromAh(const typename Traits::GPRRegister dst);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type
+ movabs(const typename Traits::GPRRegister Dst, uint64_t Imm64);
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type
+ movabs(const typename Traits::GPRRegister, uint64_t) {
+ llvm::report_fatal_error("movabs is only supported in 64-bit x86 targets.");
+ }
void movzx(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src);
@@ -328,11 +335,13 @@
void movss(Type Ty, typename Traits::XmmRegister dst,
typename Traits::XmmRegister src);
- void movd(typename Traits::XmmRegister dst, typename Traits::GPRRegister src);
- void movd(typename Traits::XmmRegister dst,
+ void movd(Type SrcTy, typename Traits::XmmRegister dst,
+ typename Traits::GPRRegister src);
+ void movd(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
- void movd(typename Traits::GPRRegister dst, typename Traits::XmmRegister src);
- void movd(const typename Traits::Address &dst,
+ void movd(Type DestTy, typename Traits::GPRRegister dst,
+ typename Traits::XmmRegister src);
+ void movd(Type DestTy, const typename Traits::Address &dst,
typename Traits::XmmRegister src);
void movq(typename Traits::XmmRegister dst, typename Traits::XmmRegister src);
@@ -504,9 +513,9 @@
void cvttps2dq(Type, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
- void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
+ void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
typename Traits::GPRRegister src);
- void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst,
+ void cvtsi2ss(Type DestTy, typename Traits::XmmRegister dst, Type SrcTy,
const typename Traits::Address &src);
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
@@ -514,9 +523,9 @@
void cvtfloat2float(Type SrcTy, typename Traits::XmmRegister dst,
const typename Traits::Address &src);
- void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
+ void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
typename Traits::XmmRegister src);
- void cvttss2si(Type SrcTy, typename Traits::GPRRegister dst,
+ void cvttss2si(Type DestTy, typename Traits::GPRRegister dst, Type SrcTy,
const typename Traits::Address &src);
void ucomiss(Type Ty, typename Traits::XmmRegister a,
@@ -719,6 +728,12 @@
void cbw();
void cwd();
void cdq();
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type cqo();
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type cqo() {
+ llvm::report_fatal_error("CQO is only available in 64-bit x86 backends.");
+ }
void div(Type Ty, typename Traits::GPRRegister reg);
void div(Type Ty, const typename Traits::Address &address);
@@ -936,7 +951,7 @@
typename Traits::GPRRegister>::value;
return IsGPR && (Reg & 0x04) != 0 && (Reg & 0x08) == 0 &&
- isByteSizedArithType(Ty);
+ isByteSizedType(Ty);
};
// assembleAndEmitRex is used for determining which (if any) rex prefix should
diff --git a/src/IceAssemblerX86BaseImpl.h b/src/IceAssemblerX86BaseImpl.h
index f785756..2cb039a 100644
--- a/src/IceAssemblerX86BaseImpl.h
+++ b/src/IceAssemblerX86BaseImpl.h
@@ -207,6 +207,8 @@
emitUint8(0xB0 + gprEncoding(dst));
emitUint8(imm.value() & 0xFF);
} else {
+ // TODO(jpp): When removing the assertion above ensure that in x86-64 we
+ // emit a 64-bit immediate.
emitUint8(0xB8 + gprEncoding(dst));
emitImmediate(Ty, imm);
}
@@ -279,9 +281,34 @@
}
template <class Machine>
+template <typename T>
+typename std::enable_if<T::Is64Bit, void>::type
+AssemblerX86Base<Machine>::movabs(const typename Traits::GPRRegister Dst,
+ uint64_t Imm64) {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ const bool NeedsRexW = (Imm64 & ~0xFFFFFFFFull) != 0;
+ const Type RexType = NeedsRexW ? RexTypeForceRexW : RexTypeIrrelevant;
+ emitRexB(RexType, Dst);
+ emitUint8(0xB8 | gprEncoding(Dst));
+ // When emitting Imm64, we don't have to mask out the upper 32 bits for
+ // emitInt32 will/should only emit a 32-bit constant. In reality, we are
+ // paranoid, so we go ahead an mask the upper bits out anyway.
+ emitInt32(Imm64 & 0xFFFFFFFF);
+ if (NeedsRexW)
+ emitInt32((Imm64 >> 32) & 0xFFFFFFFF);
+}
+
+template <class Machine>
void AssemblerX86Base<Machine>::movzx(Type SrcTy,
typename Traits::GPRRegister dst,
typename Traits::GPRRegister src) {
+ if (Traits::Is64Bit && SrcTy == IceType_i32) {
+ // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
+ // operand to 64-bit.
+ mov(IceType_i32, dst, src);
+ return;
+ }
+
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
bool ByteSized = isByteSizedType(SrcTy);
assert(ByteSized || SrcTy == IceType_i16);
@@ -295,6 +322,13 @@
void AssemblerX86Base<Machine>::movzx(Type SrcTy,
typename Traits::GPRRegister dst,
const typename Traits::Address &src) {
+ if (Traits::Is64Bit && SrcTy == IceType_i32) {
+ // 32-bit mov clears the upper 32 bits, hence zero-extending the 32-bit
+ // operand to 64-bit.
+ mov(IceType_i32, dst, src);
+ return;
+ }
+
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
bool ByteSized = isByteSizedType(SrcTy);
assert(ByteSized || SrcTy == IceType_i16);
@@ -359,7 +393,7 @@
if (Ty == IceType_i16)
emitOperandSizeOverride();
else
- assert(Ty == IceType_i32);
+ assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
emitRexRB(Ty, dst, src);
emitUint8(0x0F);
emitUint8(0x40 + cond);
@@ -375,7 +409,7 @@
if (Ty == IceType_i16)
emitOperandSizeOverride();
else
- assert(Ty == IceType_i32);
+ assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
emitRex(Ty, src, dst);
emitUint8(0x0F);
emitUint8(0x40 + cond);
@@ -423,44 +457,48 @@
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(typename Traits::XmmRegister dst,
+void AssemblerX86Base<Machine>::movd(Type SrcTy,
+ typename Traits::XmmRegister dst,
typename Traits::GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRexRB(RexTypeIrrelevant, dst, src);
+ emitRexRB(SrcTy, dst, src);
emitUint8(0x0F);
emitUint8(0x6E);
emitRegisterOperand(gprEncoding(dst), gprEncoding(src));
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(typename Traits::XmmRegister dst,
+void AssemblerX86Base<Machine>::movd(Type SrcTy,
+ typename Traits::XmmRegister dst,
const typename Traits::Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRex(RexTypeIrrelevant, src, dst);
+ emitRex(SrcTy, src, dst);
emitUint8(0x0F);
emitUint8(0x6E);
emitOperand(gprEncoding(dst), src);
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(typename Traits::GPRRegister dst,
+void AssemblerX86Base<Machine>::movd(Type DestTy,
+ typename Traits::GPRRegister dst,
typename Traits::XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRexRB(RexTypeIrrelevant, src, dst);
+ emitRexRB(DestTy, src, dst);
emitUint8(0x0F);
emitUint8(0x7E);
emitRegisterOperand(gprEncoding(src), gprEncoding(dst));
}
template <class Machine>
-void AssemblerX86Base<Machine>::movd(const typename Traits::Address &dst,
+void AssemblerX86Base<Machine>::movd(Type DestTy,
+ const typename Traits::Address &dst,
typename Traits::XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
- emitRex(RexTypeIrrelevant, dst, src);
+ emitRex(DestTy, dst, src);
emitUint8(0x0F);
emitUint8(0x7E);
emitOperand(gprEncoding(src), dst);
@@ -1343,7 +1381,7 @@
// Load 32-bit immediate value into tmp1.
mov(IceType_i32, tmp1, imm);
// Move value from tmp1 into dst.
- movd(dst, tmp1);
+ movd(IceType_i32, dst, tmp1);
// Broadcast low lane into other three lanes.
shufps(RexTypeIrrelevant, dst, dst, Immediate(0x0));
}
@@ -1487,10 +1525,11 @@
template <class Machine>
void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
typename Traits::XmmRegister dst,
+ Type SrcTy,
typename Traits::GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
- emitRexRB(RexTypeIrrelevant, dst, src);
+ emitRexRB(SrcTy, dst, src);
emitUint8(0x0F);
emitUint8(0x2A);
emitXmmRegisterOperand(dst, src);
@@ -1499,10 +1538,11 @@
template <class Machine>
void AssemblerX86Base<Machine>::cvtsi2ss(Type DestTy,
typename Traits::XmmRegister dst,
+ Type SrcTy,
const typename Traits::Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(DestTy) ? 0xF3 : 0xF2);
- emitRex(RexTypeIrrelevant, src, dst);
+ emitRex(SrcTy, src, dst);
emitUint8(0x0F);
emitUint8(0x2A);
emitOperand(gprEncoding(dst), src);
@@ -1534,24 +1574,26 @@
}
template <class Machine>
-void AssemblerX86Base<Machine>::cvttss2si(Type SrcTy,
+void AssemblerX86Base<Machine>::cvttss2si(Type DestTy,
typename Traits::GPRRegister dst,
+ Type SrcTy,
typename Traits::XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
- emitRexRB(RexTypeIrrelevant, dst, src);
+ emitRexRB(DestTy, dst, src);
emitUint8(0x0F);
emitUint8(0x2C);
emitXmmRegisterOperand(dst, src);
}
template <class Machine>
-void AssemblerX86Base<Machine>::cvttss2si(Type SrcTy,
+void AssemblerX86Base<Machine>::cvttss2si(Type DestTy,
typename Traits::GPRRegister dst,
+ Type SrcTy,
const typename Traits::Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
- emitRex(RexTypeIrrelevant, src, dst);
+ emitRex(DestTy, src, dst);
emitUint8(0x0F);
emitUint8(0x2C);
emitOperand(gprEncoding(dst), src);
@@ -2401,6 +2443,15 @@
}
template <class Machine>
+template <typename T>
+typename std::enable_if<T::Is64Bit, void>::type
+AssemblerX86Base<Machine>::cqo() {
+ AssemblerBuffer::EnsureCapacity ensured(&Buffer);
+ emitRexB(RexTypeForceRexW, RexRegIrrelevant);
+ emitUint8(0x99);
+}
+
+template <class Machine>
void AssemblerX86Base<Machine>::div(Type Ty, typename Traits::GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
if (Ty == IceType_i16)
@@ -2459,7 +2510,8 @@
void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister dst,
typename Traits::GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- assert(Ty == IceType_i16 || Ty == IceType_i32);
+ assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+ (Traits::Is64Bit && Ty == IceType_i64));
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitRexRB(Ty, dst, src);
@@ -2472,7 +2524,8 @@
void AssemblerX86Base<Machine>::imul(Type Ty, typename Traits::GPRRegister reg,
const typename Traits::Address &address) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- assert(Ty == IceType_i16 || Ty == IceType_i32);
+ assert(Ty == IceType_i16 || Ty == IceType_i32 ||
+ (Traits::Is64Bit && Ty == IceType_i64));
if (Ty == IceType_i16)
emitOperandSizeOverride();
emitRex(Ty, address, reg);
@@ -2790,8 +2843,7 @@
void AssemblerX86Base<Machine>::bswap(Type Ty,
typename Traits::GPRRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
- assert(Ty == IceType_i32);
- (void)Ty;
+ assert(Ty == IceType_i32 || (Traits::Is64Bit && Ty == IceType_i64));
emitRexB(Ty, reg);
emitUint8(0x0F);
emitUint8(0xC8 | gprEncoding(reg));
diff --git a/src/IceELFSection.h b/src/IceELFSection.h
index 5cf89a5..961d8d2 100644
--- a/src/IceELFSection.h
+++ b/src/IceELFSection.h
@@ -362,8 +362,7 @@
llvm::report_fatal_error("Missing symbol mentioned in reloc");
if (IsELF64) {
- llvm_unreachable(
- "Not tested -- check that Fixup.offset() is correct even for pc-rel");
+ // TODO(jpp): check that Fixup.offset() is correct even for pc-rel.
Elf64_Rela Rela;
Rela.r_offset = Fixup.position();
Rela.setSymbolAndType(Symbol->getNumber(), Fixup.kind());
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index c6d6abf..3a56e1b 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -206,7 +206,7 @@
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset();
- Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR);
+ Fixup = Asm->createFixup(RelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
}
diff --git a/src/IceInstX8664.cpp b/src/IceInstX8664.cpp
index 3709180..49dc9d8 100644
--- a/src/IceInstX8664.cpp
+++ b/src/IceInstX8664.cpp
@@ -179,8 +179,8 @@
Disp = static_cast<int32_t>(CI->getValue());
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
- Disp = CR->getOffset();
- Fixup = Asm->createFixup(llvm::ELF::R_386_32, CR);
+ Disp = CR->getOffset() - 4;
+ Fixup = Asm->createFixup(PcRelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
}
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h
index 6d39005..b0eb1ad 100644
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -1100,6 +1100,8 @@
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movsx> {
public:
static InstX86Movsx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+ assert(typeWidthInBytes(Dest->getType()) >
+ typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movsx>()) InstX86Movsx(Func, Dest, Src);
}
@@ -1116,6 +1118,8 @@
: public InstX86BaseUnaryopGPR<Machine, InstX86Base<Machine>::Movzx> {
public:
static InstX86Movzx *create(Cfg *Func, Variable *Dest, Operand *Src) {
+ assert(typeWidthInBytes(Dest->getType()) >
+ typeWidthInBytes(Src->getType()));
return new (Func->allocate<InstX86Movzx>()) InstX86Movzx(Func, Dest, Src);
}
diff --git a/src/IceInstX86BaseImpl.h b/src/IceInstX86BaseImpl.h
index 34417cf..4d26210 100644
--- a/src/IceInstX86BaseImpl.h
+++ b/src/IceInstX86BaseImpl.h
@@ -729,7 +729,8 @@
} else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
} else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
- AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
+ AssemblerFixup *Fixup =
+ Asm->createFixup(InstX86Base<Machine>::Traits::RelFixup, Reloc);
(Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Reloc->getOffset(), Fixup));
} else if (const auto Split = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::VariableSplit>(Src)) {
@@ -758,7 +759,8 @@
} else if (const auto Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Imm->getValue()));
} else if (const auto Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
- AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Reloc);
+ AssemblerFixup *Fixup =
+ Asm->createFixup(InstX86Base<Machine>::Traits::RelFixup, Reloc);
(Asm->*(Emitter.AddrImm))(Ty, Addr, Immediate(Reloc->getOffset(), Fixup));
} else {
llvm_unreachable("Unexpected operand type");
@@ -929,8 +931,8 @@
template <class Machine, typename DReg_t, typename SReg_t,
DReg_t (*destEnc)(int32_t), SReg_t (*srcEnc)(int32_t)>
-void emitIASCastRegOp(const Cfg *Func, Type DispatchTy, const Variable *Dest,
- const Operand *Src,
+void emitIASCastRegOp(const Cfg *Func, Type DestTy, const Variable *Dest,
+ Type SrcTy, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<DReg_t, SReg_t> &Emitter) {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
@@ -940,18 +942,18 @@
if (const auto SrcVar = llvm::dyn_cast<Variable>(Src)) {
if (SrcVar->hasReg()) {
SReg_t SrcReg = srcEnc(SrcVar->getRegNum());
- (Asm->*(Emitter.RegReg))(DispatchTy, DestReg, SrcReg);
+ (Asm->*(Emitter.RegReg))(DestTy, DestReg, SrcTy, SrcReg);
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->stackVarToAsmOperand(SrcVar);
- (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, SrcStackAddr);
+ (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, SrcStackAddr);
}
} else if (const auto Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
Mem->emitSegmentOverride(Asm);
- (Asm->*(Emitter.RegAddr))(DispatchTy, DestReg, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, Mem->toAsmAddress(Asm));
} else {
llvm_unreachable("Unexpected operand type");
}
@@ -1387,17 +1389,26 @@
case IceType_i8:
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
- Str << "\tcbtw";
+ Str << "\t"
+ << "cbtw";
break;
case IceType_i16:
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
- Str << "\tcwtd";
+ Str << "\t"
+ << "cwtd";
break;
case IceType_i32:
assert(this->getDest()->getRegNum() ==
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
- Str << "\tcltd";
+ Str << "\t"
+ << "cltd";
+ break;
+ case IceType_i64:
+ assert(this->getDest()->getRegNum() ==
+ InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ Str << "\t"
+ << "cdto";
break;
}
}
@@ -1430,6 +1441,11 @@
InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Asm->cdq();
break;
+ case IceType_i64:
+ assert(this->getDest()->getRegNum() ==
+ InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ Asm->cqo();
+ break;
}
}
@@ -1592,7 +1608,8 @@
assert(this->getSrcSize() == 2);
Operand *Src = this->getSrc(1);
Type SrcTy = Src->getType();
- assert(SrcTy == IceType_i16 || SrcTy == IceType_i32);
+ assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
+ (InstX86Base<Machine>::Traits::Is64Bit));
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
@@ -1814,7 +1831,11 @@
switch (Variant) {
case Si2ss: {
assert(isScalarIntegerType(SrcTy));
- assert(typeWidthInBytes(SrcTy) <= 4);
+ if (!InstX86Base<Machine>::Traits::Is64Bit) {
+ assert(typeWidthInBytes(SrcTy) <= 4);
+ } else {
+ assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
+ }
assert(isScalarFloatingType(DestTy));
static const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<
@@ -1828,13 +1849,17 @@
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR>(
- Func, DestTy, Dest, Src, Emitter);
+ Func, DestTy, Dest, SrcTy, Src, Emitter);
return;
}
case Tss2si: {
assert(isScalarFloatingType(SrcTy));
assert(isScalarIntegerType(DestTy));
- assert(typeWidthInBytes(DestTy) <= 4);
+ if (!InstX86Base<Machine>::Traits::Is64Bit) {
+ assert(typeWidthInBytes(DestTy) <= 4);
+ } else {
+ assert(DestTy == IceType_i32 || DestTy == IceType_i64);
+ }
static const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<
typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister,
@@ -1847,7 +1872,7 @@
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm>(
- Func, SrcTy, Dest, Src, Emitter);
+ Func, DestTy, Dest, SrcTy, Src, Emitter);
return;
}
case Float2float: {
@@ -2244,6 +2269,10 @@
this->getDest()->emit(Func);
}
+inline bool isIntegerConstant(const Operand *Op) {
+ return llvm::isa<ConstantInteger32>(Op) || llvm::isa<ConstantInteger64>(Op);
+}
+
template <class Machine> void InstX86Mov<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
@@ -2252,11 +2281,16 @@
Operand *Src = this->getSrc(0);
Type SrcTy = Src->getType();
Type DestTy = this->getDest()->getType();
- Str << "\tmov"
- << (!isScalarFloatingType(DestTy)
- ? this->getWidthString(SrcTy)
- : InstX86Base<Machine>::Traits::TypeAttributes[DestTy].SdSsString)
- << "\t";
+ if (InstX86Base<Machine>::Traits::Is64Bit && DestTy == IceType_i64 &&
+ isIntegerConstant(Src)) {
+ Str << "\tmovabs\t";
+ } else {
+ Str << "\tmov"
+ << (!isScalarFloatingType(DestTy)
+ ? this->getWidthString(SrcTy)
+ : InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
+ .SdSsString) << "\t";
+ }
// For an integer truncation operation, src is wider than dest.
// Ideally, we use a mov instruction whose data width matches the
// narrower dest. This is a problem if e.g. src is a register like
@@ -2320,6 +2354,20 @@
assert(isScalarIntegerType(DestTy));
// Widen DestTy for truncation (see above note). We should only do this
// when both Src and Dest are integer types.
+ if (InstX86Base<Machine>::Traits::Is64Bit && DestTy == IceType_i64 &&
+ isIntegerConstant(Src)) {
+ uint64_t Value = -1;
+ if (const auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src)) {
+ Value = C64->getValue();
+ } else {
+ Value = llvm::cast<ConstantInteger32>(Src)->getValue();
+ }
+ Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>()
+ ->movabs(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+ Dest->getRegNum()),
+ Value);
+ return;
+ }
if (isScalarIntegerType(SrcTy)) {
DestTy = SrcTy;
}
@@ -2363,14 +2411,19 @@
const auto SrcVar = llvm::cast<Variable>(this->getSrc(0));
// For insert/extract element (one of Src/Dest is an Xmm vector and
// the other is an int type).
- if (SrcVar->getType() == IceType_i32) {
- assert(isVectorType(Dest->getType()));
+ if (SrcVar->getType() == IceType_i32 ||
+ (InstX86Base<Machine>::Traits::Is64Bit &&
+ SrcVar->getType() == IceType_i64)) {
+ assert(isVectorType(Dest->getType()) ||
+ (isScalarFloatingType(Dest->getType()) &&
+ typeWidthInBytes(SrcVar->getType()) ==
+ typeWidthInBytes(Dest->getType())));
assert(Dest->hasReg());
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister DestReg =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
Dest->getRegNum());
if (SrcVar->hasReg()) {
- Asm->movd(DestReg,
+ Asm->movd(SrcVar->getType(), DestReg,
InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
SrcVar->getRegNum()));
} else {
@@ -2378,17 +2431,23 @@
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->stackVarToAsmOperand(SrcVar));
- Asm->movd(DestReg, StackAddr);
+ Asm->movd(SrcVar->getType(), DestReg, StackAddr);
}
} else {
- assert(isVectorType(SrcVar->getType()));
+ assert(isVectorType(SrcVar->getType()) ||
+ (isScalarFloatingType(SrcVar->getType()) &&
+ typeWidthInBytes(SrcVar->getType()) ==
+ typeWidthInBytes(Dest->getType())));
assert(SrcVar->hasReg());
- assert(Dest->getType() == IceType_i32);
+ assert(Dest->getType() == IceType_i32 ||
+ (InstX86Base<Machine>::Traits::Is64Bit &&
+ Dest->getType() == IceType_i64));
typename InstX86Base<Machine>::Traits::RegisterSet::XmmRegister SrcReg =
InstX86Base<Machine>::Traits::RegisterSet::getEncodedXmm(
SrcVar->getRegNum());
if (Dest->hasReg()) {
- Asm->movd(InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
+ Asm->movd(Dest->getType(),
+ InstX86Base<Machine>::Traits::RegisterSet::getEncodedGPR(
Dest->getRegNum()),
SrcReg);
} else {
@@ -2396,7 +2455,7 @@
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
Func->getTarget())
->stackVarToAsmOperand(Dest));
- Asm->movd(StackAddr, SrcReg);
+ Asm->movd(Dest->getType(), StackAddr, SrcReg);
}
}
}
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 6724a61..466564d 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -792,7 +792,7 @@
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JT : Ctx->getJumpTables())
- Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
+ Writer->writeJumpTable(JT, TargetX8632::Traits::RelFixup);
} break;
case FT_Asm:
// Already emitted from Cfg
@@ -821,7 +821,8 @@
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
- Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
+ Writer->writeDataSection(Vars, TargetX8632::Traits::RelFixup,
+ SectionSuffix);
} break;
case FT_Asm:
case FT_Iasm: {
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h
index 29066aa..e0acbd6 100644
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -68,6 +68,7 @@
static const GPRRegister Encoded_Reg_Accumulator = RegX8632::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8632::Encoded_Reg_ecx;
static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32;
+ static const FixupKind RelFixup = llvm::ELF::R_386_32;
class Operand {
public:
@@ -272,6 +273,7 @@
};
static const char *TargetName;
+ static constexpr Type WordType = IceType_i32;
static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM);
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
index 9056648..41d24cc 100644
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -123,7 +123,7 @@
}
// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
-// OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
+// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
} // end of anonymous namespace
@@ -239,7 +239,6 @@
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
- Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
@@ -250,12 +249,8 @@
case IceType_i8:
case IceType_i16:
case IceType_i32:
- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
- break;
case IceType_i64:
- // TODO(jpp): return i64 in a GPR.
- ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_f32:
case IceType_f64:
@@ -271,27 +266,16 @@
}
}
- Operand *CallTarget = legalize(Instr->getCallTarget());
+ Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm);
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
- if (llvm::isa<Constant>(CallTarget)) {
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- } else {
- Variable *CallTargetVar = nullptr;
- _mov(CallTargetVar, CallTarget);
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- const SizeT BundleSize =
- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
- _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
- CallTarget = CallTargetVar;
- }
+ llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
- if (NeedSandboxing)
- _bundle_unlock();
- if (ReturnRegHi)
- Context.insert(InstFakeDef::create(Func, ReturnRegHi));
+ if (NeedSandboxing) {
+ llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
+ }
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
@@ -315,25 +299,11 @@
assert(ReturnReg && "x86-64 always returns value on registers.");
- // Assign the result of the call to Dest.
- if (ReturnRegHi) {
- assert(Dest->getType() == IceType_i64);
- split64(Dest);
- Variable *DestLo = Dest->getLo();
- Variable *DestHi = Dest->getHi();
- _mov(DestLo, ReturnReg);
- _mov(DestHi, ReturnRegHi);
- return;
- }
-
- assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
- Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
- isVectorType(Dest->getType()));
-
- if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
+ if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
+ assert(isScalarFloatingType(Dest->getType()) ||
+ isScalarIntegerType(Dest->getType()));
_mov(Dest, ReturnReg);
}
}
@@ -356,36 +326,36 @@
++i) {
Variable *Arg = Args[i];
Type Ty = Arg->getType();
- if ((isVectorType(Ty) || isScalarFloatingType(Ty)) &&
- NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
- // Replace Arg in the argument list with the home register. Then
- // generate an instruction in the prolog to copy the home register
- // to the assigned location of Arg.
- int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs);
+ Variable *RegisterArg = nullptr;
+ int32_t RegNum = Variable::NoRegister;
+ if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
+ if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
+ continue;
+ }
+ RegNum = getRegisterForXmmArgNum(NumXmmArgs);
++NumXmmArgs;
- Variable *RegisterArg = Func->makeVariable(Ty);
- if (BuildDefs::dump())
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[i] = RegisterArg;
- Context.insert(InstAssign::create(Func, Arg, RegisterArg));
- } else if (isScalarIntegerType(Ty) &&
- NumGprArgs < Traits::X86_MAX_GPR_ARGS) {
- int32_t RegNum = getRegisterForGprArgNum(NumGprArgs);
+ RegisterArg = Func->makeVariable(Ty);
+ } else if (isScalarIntegerType(Ty)) {
+ if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
+ continue;
+ }
+ RegNum = getRegisterForGprArgNum(NumGprArgs);
++NumGprArgs;
- Variable *RegisterArg = Func->makeVariable(Ty);
- if (BuildDefs::dump())
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[i] = RegisterArg;
- Context.insert(InstAssign::create(Func, Arg, RegisterArg));
+ RegisterArg = Func->makeVariable(Ty);
}
+ assert(RegNum != Variable::NoRegister);
+ assert(RegisterArg != nullptr);
+ // Replace Arg in the argument list with the home register. Then
+ // generate an instruction in the prolog to copy the home register
+ // to the assigned location of Arg.
+ if (BuildDefs::dump())
+ RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
+ RegisterArg->setRegNum(RegNum);
+ RegisterArg->setIsArg();
+ Arg->setIsArg(false);
+
+ Args[i] = RegisterArg;
+ Context.insert(InstAssign::create(Func, Arg, RegisterArg));
}
}
@@ -393,19 +363,11 @@
Variable *Reg = nullptr;
if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue());
- // TODO(jpp): this is not needed.
- if (Src0->getType() == IceType_i64) {
- Variable *eax =
- legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
- Variable *edx =
- legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
- Reg = eax;
- Context.insert(InstFakeUse::create(Func, edx));
- } else if (isScalarFloatingType(Src0->getType())) {
- _fld(Src0);
- } else if (isVectorType(Src0->getType())) {
+ if (isVectorType(Src0->getType()) ||
+ isScalarFloatingType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else {
+ assert(isScalarIntegerType(Src0->getType()));
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
@@ -577,19 +539,17 @@
unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
- if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
- ++NumXmmArgs;
- continue;
- }
- if (isScalarFloatingType(Arg->getType()) &&
- NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
- ++NumXmmArgs;
- continue;
- }
- if (isScalarIntegerType(Arg->getType()) &&
- NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
- ++NumGPRArgs;
- continue;
+ if (isVectorType(Arg->getType()) || isScalarFloatingType(Arg->getType())) {
+ if (NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
+ ++NumXmmArgs;
+ continue;
+ }
+ } else {
+ assert(isScalarIntegerType(Arg->getType()));
+ if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
+ ++NumGPRArgs;
+ continue;
+ }
}
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
@@ -679,23 +639,9 @@
}
}
- if (!Ctx->getFlags().getUseSandboxing())
- return;
- // Change the original ret instruction into a sandboxed return sequence.
- // t:ecx = pop
- // bundle_lock
- // and t, ~31
- // jmp *t
- // bundle_unlock
- // FakeUse <original_ret_operand>
- Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
- _pop(T_ecx);
- lowerIndirectJump(T_ecx);
- if (RI->getSrcSize()) {
- Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
- Context.insert(InstFakeUse::create(Func, RetValue));
+ if (Ctx->getFlags().getUseSandboxing()) {
+ llvm_unreachable("X86-64 Sandboxing codegen not implemented.");
}
- RI->setDeleted();
}
void TargetX8664::emitJumpTable(const Cfg *Func,
@@ -858,8 +804,7 @@
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JumpTable : Ctx->getJumpTables())
- // TODO(jpp): not 386.
- Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
+ Writer->writeJumpTable(JumpTable, TargetX8664::Traits::RelFixup);
} break;
case FT_Asm:
// Already emitted from Cfg
@@ -888,8 +833,8 @@
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
- // TODO(jpp): not 386.
- Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
+ Writer->writeDataSection(Vars, TargetX8664::Traits::RelFixup,
+ SectionSuffix);
} break;
case FT_Asm:
case FT_Iasm: {
diff --git a/src/IceTargetLoweringX8664Traits.h b/src/IceTargetLoweringX8664Traits.h
index 89fc203..4a12004 100644
--- a/src/IceTargetLoweringX8664Traits.h
+++ b/src/IceTargetLoweringX8664Traits.h
@@ -66,7 +66,8 @@
using RegisterSet = ::Ice::RegX8664;
static const GPRRegister Encoded_Reg_Accumulator = RegX8664::Encoded_Reg_eax;
static const GPRRegister Encoded_Reg_Counter = RegX8664::Encoded_Reg_ecx;
- static const FixupKind PcRelFixup = llvm::ELF::R_386_PC32; // TODO(jpp): ???
+ static const FixupKind PcRelFixup = llvm::ELF::R_X86_64_PC32;
+ static const FixupKind RelFixup = llvm::ELF::R_X86_64_32S;
class Operand {
public:
@@ -270,8 +271,8 @@
static Address ofConstPool(Assembler *Asm, const Constant *Imm) {
// TODO(jpp): ???
- AssemblerFixup *Fixup = Asm->createFixup(llvm::ELF::R_386_32, Imm);
- const RelocOffsetT Offset = 0;
+ AssemblerFixup *Fixup = Asm->createFixup(RelFixup, Imm);
+ const RelocOffsetT Offset = 4;
return Address(ABSOLUTE, Offset, Fixup);
}
};
@@ -293,6 +294,7 @@
};
static const char *TargetName;
+ static constexpr Type WordType = IceType_i64;
static IceString getRegName(SizeT RegNum, Type Ty) {
assert(RegNum < RegisterSet::Reg_NUM);
@@ -331,7 +333,7 @@
#define X(val, encode, name64, name32, name16, name8, scratch, preserved, \
stackptr, frameptr, isInt, isFP) \
(*IntegerRegisters)[RegisterSet::val] = isInt; \
- (*IntegerRegistersI8)[RegisterSet::val] = 1; \
+ (*IntegerRegistersI8)[RegisterSet::val] = isInt; \
(*FloatRegisters)[RegisterSet::val] = isFP; \
(*VectorRegisters)[RegisterSet::val] = isFP; \
(*ScratchRegs)[RegisterSet::val] = scratch;
@@ -450,7 +452,7 @@
/// address.
static const uint32_t X86_STACK_ALIGNMENT_BYTES;
/// Size of the return address on the stack
- static const uint32_t X86_RET_IP_SIZE_BYTES = 4;
+ static const uint32_t X86_RET_IP_SIZE_BYTES = 8;
/// The number of different NOP instructions
static const uint32_t X86_NUM_NOP_VARIANTS = 5;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 342c97b..da863f4 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -21,6 +21,7 @@
#include "IceInst.h"
#include "IceSwitchLowering.h"
#include "IceTargetLowering.h"
+#include "IceUtils.h"
#include <type_traits>
#include <utility>
@@ -80,10 +81,9 @@
: Traits::RegisterSet::Reg_esp;
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
- // Round up to the next multiple of 4 bytes. In particular, i1,
- // i8, and i16 are rounded up to 4 bytes.
- // TODO(jpp): this needs to round to multiples of 8 bytes in x86-64.
- return (typeWidthInBytes(Ty) + 3) & ~3;
+ // Round up to the next multiple of WordType bytes.
+ const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType);
+ return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
}
SizeT getMinJumpTableSize() const override { return 4; }
@@ -98,14 +98,40 @@
void emit(const ConstantDouble *C) const final;
void initNodeForLowering(CfgNode *Node) override;
- /// Ensure that a 64-bit Variable has been split into 2 32-bit
+ /// x86-32: Ensure that a 64-bit Variable has been split into 2 32-bit
/// Variables, creating them if necessary. This is needed for all
/// I64 operations, and it is needed for pushing F64 arguments for
/// function calls using the 32-bit push instruction (though the
/// latter could be done by directly writing to the stack).
- void split64(Variable *Var);
- Operand *loOperand(Operand *Operand);
- Operand *hiOperand(Operand *Operand);
+ ///
+ /// x86-64: Complains loudly if invoked because the cpu can handle
+ /// 64-bit types natively.
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type split64(Variable *Var);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type split64(Variable *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (split64)");
+ }
+
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, Operand>::type *
+ loOperand(Operand *Operand);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, Operand>::type *loOperand(Operand *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (loOperand)");
+ }
+
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, Operand>::type *
+ hiOperand(Operand *Operand);
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, Operand>::type *hiOperand(Operand *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (hiOperand)");
+ }
+
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t &InArgsSizeBytes);
typename Traits::Address stackVarToAsmOperand(const Variable *Var) const;
@@ -128,6 +154,19 @@
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
void lowerIcmp(const InstIcmp *Inst) override;
+ /// Complains loudly if invoked because the cpu can handle 64-bit types
+ /// natively.
+ template <typename T = Traits>
+ typename std::enable_if<T::Is64Bit, void>::type
+ lowerIcmp64(const InstIcmp *) {
+ llvm::report_fatal_error(
+ "Hey, yo! This is x86-64. Watcha doin'? (lowerIcmp64)");
+ }
+ /// x86lowerIcmp64 handles 64-bit icmp lowering.
+ template <typename T = Traits>
+ typename std::enable_if<!T::Is64Bit, void>::type
+ lowerIcmp64(const InstIcmp *Inst);
+
void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
void lowerInsertElement(const InstInsertElement *Inst) override;
void lowerLoad(const InstLoad *Inst) override;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 8dad58e..e190b5d 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -77,6 +77,7 @@
public:
enum BoolFoldingProducerKind {
PK_None,
+ // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
PK_Icmp32,
PK_Icmp64,
PK_Fcmp,
@@ -120,7 +121,7 @@
typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
if (llvm::isa<InstIcmp>(Instr)) {
- if (Instr->getSrc(0)->getType() != IceType_i64)
+ if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
return PK_Icmp32;
return PK_None; // TODO(stichnot): actually PK_Icmp64;
}
@@ -643,10 +644,10 @@
} else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
// An AtomicLoad intrinsic qualifies as long as it has a valid
// memory ordering, and can be implemented in a single
- // instruction (i.e., not i64).
+ // instruction (i.e., not i64 on x86-32).
Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
if (ID == Intrinsics::AtomicLoad &&
- Intrin->getDest()->getType() != IceType_i64 &&
+ (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
Intrinsics::isMemoryOrderValid(
ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
LoadDest = Intrin->getDest();
@@ -724,6 +725,10 @@
template <class Machine>
Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
+ // Special case: never allow partial reads/writes to/from %rBP and %rSP.
+ if (RegNum == Traits::RegisterSet::Reg_esp ||
+ RegNum == Traits::RegisterSet::Reg_ebp)
+ Ty = Traits::WordType;
if (Ty == IceType_void)
Ty = IceType_i32;
if (PhysicalRegisters[Ty].empty())
@@ -770,7 +775,7 @@
}
if (Offset)
Str << Offset;
- const Type FrameSPTy = IceType_i32;
+ const Type FrameSPTy = Traits::WordType;
Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
}
@@ -810,8 +815,7 @@
Variable *Lo = Arg->getLo();
Variable *Hi = Arg->getHi();
Type Ty = Arg->getType();
- if (Lo && Hi && Ty == IceType_i64) {
- // TODO(jpp): This special case is not needed for x86-64.
+ if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
assert(Lo->getType() != IceType_i64); // don't want infinite recursion
assert(Hi->getType() != IceType_i64); // don't want infinite recursion
finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
@@ -824,7 +828,7 @@
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
if (Arg->hasReg()) {
- assert(Ty != IceType_i64);
+ assert(Ty != IceType_i64 || Traits::Is64Bit);
typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
if (isVectorType(Arg->getType())) {
@@ -840,11 +844,13 @@
}
template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
- // TODO(jpp): this is wrong for x86-64.
- return IceType_i32;
+ return Traits::WordType;
}
-template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
+template <class Machine>
+template <typename T>
+typename std::enable_if<!T::Is64Bit, void>::type
+TargetX86Base<Machine>::split64(Variable *Var) {
switch (Var->getType()) {
default:
return;
@@ -876,7 +882,9 @@
}
template <class Machine>
-Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
+template <typename T>
+typename std::enable_if<!T::Is64Bit, Operand>::type *
+TargetX86Base<Machine>::loOperand(Operand *Operand) {
assert(Operand->getType() == IceType_i64 ||
Operand->getType() == IceType_f64);
if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -905,7 +913,9 @@
}
template <class Machine>
-Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
+template <typename T>
+typename std::enable_if<!T::Is64Bit, Operand>::type *
+TargetX86Base<Machine>::hiOperand(Operand *Operand) {
assert(Operand->getType() == IceType_i64 ||
Operand->getType() == IceType_f64);
if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -1107,8 +1117,8 @@
if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
std::swap(Src0, Src1);
}
- if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ // These x86-32 helper-call-involved instructions are lowered in this
// separate switch. This is because loOperand() and hiOperand()
// may insert redundant instructions for constant blinding and
// pooling. Such redundant instructions will fail liveness analysis
@@ -1656,7 +1666,8 @@
Context.insert(InstFakeUse::create(Func, T_eax));
} else {
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
+ T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
+ _mov(T_edx, Zero);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
_div(T_edx, Src1, T);
_mov(Dest, T_edx);
@@ -1721,7 +1732,7 @@
_mov(Dest, T);
Context.insert(InstFakeUse::create(Func, T_eax));
} else {
- T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
_cbwdq(T_edx, T);
_idiv(T_edx, Src1, T);
@@ -1765,7 +1776,7 @@
Variable *Dest = Inst->getDest();
Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType());
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
Src0 = legalize(Src0);
Operand *Src0Lo = loOperand(Src0);
Operand *Src0Hi = hiOperand(Src0);
@@ -1870,7 +1881,7 @@
_psra(T, ShiftConstant);
_movp(Dest, T);
}
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Constant *Shift = Ctx->getConstantInt32(31);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -1930,7 +1941,7 @@
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// t1=movzx src; dst.lo=t1; dst.hi=0
Constant *Zero = Ctx->getConstantZero(IceType_i32);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -1951,13 +1962,16 @@
// t = Src0RM; t &= 1; Dest = t
Constant *One = Ctx->getConstantInt32(1);
Type DestTy = Dest->getType();
- Variable *T;
+ Variable *T = nullptr;
if (DestTy == IceType_i8) {
- T = makeReg(DestTy);
_mov(T, Src0RM);
} else {
+ assert(DestTy != IceType_i1);
+ assert(Traits::Is64Bit || DestTy != IceType_i64);
// Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
- T = makeReg(IceType_i32);
+ // In x86-64 we need to widen T to 64-bits to ensure that T -- if
+ // written to the stack (i.e., in -Om1) will be fully zero-extended.
+ T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
_movzx(T, Src0RM);
}
_and(T, One);
@@ -1982,7 +1996,7 @@
_movp(Dest, T);
} else {
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
- if (Src0->getType() == IceType_i64)
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
Src0 = loOperand(Src0);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// t1 = trunc Src0RM; Dest = t1
@@ -2013,7 +2027,7 @@
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Use a helper for converting floating-point values to 64-bit
// integers. SSE2 appears to have no way to convert from xmm
// registers to something like the edx:eax register pair, and
@@ -2032,7 +2046,15 @@
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Dest->getType() != IceType_i64);
+ T_1 = makeReg(IceType_i32);
+ }
+ // cvt() requires its integer argument to be a GPR.
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2050,14 +2072,18 @@
Call->addArg(Inst->getSrc(0));
lowerCall(Call);
} else if (Dest->getType() == IceType_i64 ||
- Dest->getType() == IceType_i32) {
+ (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
// Use a helper for both x86-32 and x86-64.
- split64(Dest);
+ if (!Traits::Is64Bit)
+ split64(Dest);
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
Type SrcType = Inst->getSrc(0)->getType();
IceString TargetString;
- if (isInt32Asserting32Or64(DestType)) {
+ if (Traits::Is64Bit) {
+ TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
+ : H_fptoui_f64_i64;
+ } else if (isInt32Asserting32Or64(DestType)) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
: H_fptoui_f64_i32;
} else {
@@ -2071,7 +2097,15 @@
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
+ assert(Dest->getType() != IceType_i64);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Dest->getType() != IceType_i32);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2090,7 +2124,7 @@
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T);
- } else if (Inst->getSrc(0)->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32.
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
@@ -2106,9 +2140,16 @@
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// Sign-extend the operand.
// t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Src0RM->getType() != IceType_i64);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
+ if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
_movsx(T_1, Src0RM);
@@ -2126,7 +2167,7 @@
Call->addArg(Src0);
lowerCall(Call);
} else if (Src0->getType() == IceType_i64 ||
- Src0->getType() == IceType_i32) {
+ (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
// Use a helper for x86-32 and x86-64. Also use a helper for
// i32 on x86-32.
const SizeT MaxSrcs = 1;
@@ -2147,9 +2188,17 @@
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// Zero-extend the operand.
// t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Src0RM->getType() != IceType_i64);
+ assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
+ if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
_movzx(T_1, Src0RM);
@@ -2205,77 +2254,96 @@
_mov(Dest, Spill);
} break;
case IceType_i64: {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- assert(Src0RM->getType() == IceType_f64);
- // a.i64 = bitcast b.f64 ==>
- // s.f64 = spill b.f64
- // t_lo.i32 = lo(s.f64)
- // a_lo.i32 = t_lo.i32
- // t_hi.i32 = hi(s.f64)
- // a_hi.i32 = t_hi.i32
- Operand *SpillLo, *SpillHi;
- if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
- typename Traits::SpillVariable *SpillVar =
- Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Src0Var);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
- _movq(Spill, Src0RM);
- SpillLo = Traits::VariableSplit::create(Func, Spill,
- Traits::VariableSplit::Low);
- SpillHi = Traits::VariableSplit::create(Func, Spill,
- Traits::VariableSplit::High);
+ assert(Src0->getType() == IceType_f64);
+ if (Traits::Is64Bit) {
+ // Movd requires its fp argument (in this case, the bitcast source) to
+ // be an xmm register.
+ Variable *Src0R = legalizeToReg(Src0);
+ Variable *T = makeReg(IceType_i64);
+ _movd(T, Src0R);
+ _mov(Dest, T);
} else {
- SpillLo = loOperand(Src0RM);
- SpillHi = hiOperand(Src0RM);
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ // a.i64 = bitcast b.f64 ==>
+ // s.f64 = spill b.f64
+ // t_lo.i32 = lo(s.f64)
+ // a_lo.i32 = t_lo.i32
+ // t_hi.i32 = hi(s.f64)
+ // a_hi.i32 = t_hi.i32
+ Operand *SpillLo, *SpillHi;
+ if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
+ typename Traits::SpillVariable *SpillVar =
+ Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Src0Var);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
+ _movq(Spill, Src0RM);
+ SpillLo = Traits::VariableSplit::create(Func, Spill,
+ Traits::VariableSplit::Low);
+ SpillHi = Traits::VariableSplit::create(Func, Spill,
+ Traits::VariableSplit::High);
+ } else {
+ SpillLo = loOperand(Src0RM);
+ SpillHi = hiOperand(Src0RM);
+ }
+
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *T_Lo = makeReg(IceType_i32);
+ Variable *T_Hi = makeReg(IceType_i32);
+
+ _mov(T_Lo, SpillLo);
+ _mov(DestLo, T_Lo);
+ _mov(T_Hi, SpillHi);
+ _mov(DestHi, T_Hi);
}
-
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = makeReg(IceType_i32);
- Variable *T_Hi = makeReg(IceType_i32);
-
- _mov(T_Lo, SpillLo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, SpillHi);
- _mov(DestHi, T_Hi);
} break;
case IceType_f64: {
- Src0 = legalize(Src0);
assert(Src0->getType() == IceType_i64);
- if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
- Variable *T = Func->makeVariable(Dest->getType());
- _movq(T, Src0);
- _movq(Dest, T);
- break;
- }
- // a.f64 = bitcast b.i64 ==>
- // t_lo.i32 = b_lo.i32
- // FakeDef(s.f64)
- // lo(s.f64) = t_lo.i32
- // t_hi.i32 = b_hi.i32
- // hi(s.f64) = t_hi.i32
- // a.f64 = s.f64
- typename Traits::SpillVariable *SpillVar =
- Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Dest);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
+ if (Traits::Is64Bit) {
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ Variable *T = makeReg(IceType_f64);
+ // Movd requires its fp argument (in this case, the bitcast destination)
+ // to be an xmm register.
+ T->setWeightInfinite();
+ _movd(T, Src0RM);
+ _mov(Dest, T);
+ } else {
+ Src0 = legalize(Src0);
+ if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
+ Variable *T = Func->makeVariable(Dest->getType());
+ _movq(T, Src0);
+ _movq(Dest, T);
+ break;
+ }
+ // a.f64 = bitcast b.i64 ==>
+ // t_lo.i32 = b_lo.i32
+ // FakeDef(s.f64)
+ // lo(s.f64) = t_lo.i32
+ // t_hi.i32 = b_hi.i32
+ // hi(s.f64) = t_hi.i32
+ // a.f64 = s.f64
+ typename Traits::SpillVariable *SpillVar =
+ Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Dest);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
- Func, Spill, Traits::VariableSplit::Low);
- typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
- Func, Spill, Traits::VariableSplit::High);
- _mov(T_Lo, loOperand(Src0));
- // Technically, the Spill is defined after the _store happens, but
- // SpillLo is considered a "use" of Spill so define Spill before it
- // is used.
- Context.insert(InstFakeDef::create(Func, Spill));
- _store(T_Lo, SpillLo);
- _mov(T_Hi, hiOperand(Src0));
- _store(T_Hi, SpillHi);
- _movq(Dest, Spill);
+ Variable *T_Lo = nullptr, *T_Hi = nullptr;
+ typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
+ Func, Spill, Traits::VariableSplit::Low);
+ typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
+ Func, Spill, Traits::VariableSplit::High);
+ _mov(T_Lo, loOperand(Src0));
+ // Technically, the Spill is defined after the _store happens, but
+ // SpillLo is considered a "use" of Spill so define Spill before it
+ // is used.
+ Context.insert(InstFakeDef::create(Func, Spill));
+ _store(T_Lo, SpillLo);
+ _mov(T_Hi, hiOperand(Src0));
+ _store(T_Hi, SpillHi);
+ _movq(Dest, Spill);
+ }
} break;
case IceType_v8i1: {
assert(Src0->getType() == IceType_i8);
@@ -2615,32 +2683,8 @@
return;
}
- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
- if (Src0->getType() == IceType_i64) {
- InstIcmp::ICond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < Traits::TableIcmp64Size);
- Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Constant *One = Ctx->getConstantInt32(1);
- typename Traits::Insts::Label *LabelFalse =
- Traits::Insts::Label::create(Func, this);
- typename Traits::Insts::Label *LabelTrue =
- Traits::Insts::Label::create(Func, this);
- _mov(Dest, One);
- _cmp(Src0HiRM, Src1HiRI);
- if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
- _br(Traits::TableIcmp64[Index].C1, LabelTrue);
- if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
- _br(Traits::TableIcmp64[Index].C2, LabelFalse);
- _cmp(Src0LoRM, Src1LoRI);
- _br(Traits::TableIcmp64[Index].C3, LabelTrue);
- Context.insert(LabelFalse);
- _mov_nonkillable(Dest, Zero);
- Context.insert(LabelTrue);
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
+ lowerIcmp64(Inst);
return;
}
@@ -2650,6 +2694,40 @@
_setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
}
+template <typename Machine>
+template <typename T>
+typename std::enable_if<!T::Is64Bit, void>::type
+TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
+ Operand *Src0 = legalize(Inst->getSrc(0));
+ Operand *Src1 = legalize(Inst->getSrc(1));
+ Variable *Dest = Inst->getDest();
+ InstIcmp::ICond Condition = Inst->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < Traits::TableIcmp64Size);
+ Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
+ Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ Constant *One = Ctx->getConstantInt32(1);
+ typename Traits::Insts::Label *LabelFalse =
+ Traits::Insts::Label::create(Func, this);
+ typename Traits::Insts::Label *LabelTrue =
+ Traits::Insts::Label::create(Func, this);
+ _mov(Dest, One);
+ _cmp(Src0HiRM, Src1HiRI);
+ if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C1, LabelTrue);
+ if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C2, LabelFalse);
+ _cmp(Src0LoRM, Src1LoRI);
+ _br(Traits::TableIcmp64[Index].C3, LabelTrue);
+ Context.insert(LabelFalse);
+ _mov_nonkillable(Dest, Zero);
+ Context.insert(LabelTrue);
+}
+
template <class Machine>
void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
Operand *SourceVectNotLegalized = Inst->getSrc(0);
@@ -2848,7 +2926,7 @@
return;
}
Variable *Dest = Instr->getDest();
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Follow what GCC does and use a movq instead of what lowerLoad()
// normally does (split the load into two).
// Thus, this skips load/arithmetic op folding. Load/arithmetic folding
@@ -2898,7 +2976,7 @@
// Add a fence after the store to make it visible.
Operand *Value = Instr->getArg(0);
Operand *Ptr = Instr->getArg(1);
- if (Value->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
// Use a movq instead of what lowerStore() normally does
// (split the store into two), following what GCC does.
// Cast the bits from int -> to an xmm register first.
@@ -2922,7 +3000,7 @@
Operand *Val = Instr->getArg(0);
// In 32-bit mode, bswap only works on 32-bit arguments, and the
// argument must be a register. Use rotate left for 16-bit bswap.
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Val = legalizeUndef(Val);
Variable *T_Lo = legalizeToReg(loOperand(Val));
Variable *T_Hi = legalizeToReg(hiOperand(Val));
@@ -2932,7 +3010,8 @@
_bswap(T_Hi);
_mov(DestLo, T_Hi);
_mov(DestHi, T_Lo);
- } else if (Val->getType() == IceType_i32) {
+ } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
+ Val->getType() == IceType_i32) {
Variable *T = legalizeToReg(Val);
_bswap(T);
_mov(Dest, T);
@@ -2949,11 +3028,28 @@
}
case Intrinsics::Ctpop: {
Variable *Dest = Instr->getDest();
+ Variable *T = nullptr;
Operand *Val = Instr->getArg(0);
- InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
- ? H_call_ctpop_i32
- : H_call_ctpop_i64,
- Dest, 1);
+ Type ValTy = Val->getType();
+ assert(ValTy == IceType_i32 || ValTy == IceType_i64);
+
+ if (!Traits::Is64Bit) {
+ T = Dest;
+ } else {
+ T = makeReg(IceType_i64);
+ if (ValTy == IceType_i32) {
+ // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
+ // converting it to a 64-bit value, and using ctpop_i64. _movzx should
+ // ensure we will not have any bits set on Val's upper 32 bits.
+ Variable *V = makeReg(IceType_i64);
+ _movzx(V, Val);
+ Val = V;
+ }
+ ValTy = IceType_i64;
+ }
+
+ InstCall *Call = makeHelperCall(
+ ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
@@ -2961,10 +3057,33 @@
// (in 64-bit mode). Thus, clear the upper bits of the dest just in case
// the user doesn't do that in the IR. If the user does that in the IR,
// then this zero'ing instruction is dead and gets optimized out.
- if (Val->getType() == IceType_i64) {
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(DestHi, Zero);
+ if (!Traits::Is64Bit) {
+ assert(T == Dest);
+ if (Val->getType() == IceType_i64) {
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ _mov(DestHi, Zero);
+ }
+ } else {
+ assert(Val->getType() == IceType_i64);
+ // T is 64 bit. It needs to be copied to dest. We need to:
+ //
+ // T_1.32 = trunc T.64 to i32
+ // T_2.64 = zext T_1.32 to i64
+ // Dest.<<right_size>> = T_2.<<right_size>>
+ //
+ // which ensures the upper 32 bits will always be cleared. Just doing a
+ //
+ // mov Dest.32 = trunc T.32 to i32
+ //
+ // is dangerous because there's a chance the compiler will optimize this
+ // copy out. To use _movzx we need two new registers (one 32-, and
+ // another 64-bit wide.)
+ Variable *T_1 = makeReg(IceType_i32);
+ _mov(T_1, T);
+ Variable *T_2 = makeReg(IceType_i64);
+ _movzx(T_2, T_1);
+ _mov(Dest, T_2);
}
return;
}
@@ -2974,7 +3093,7 @@
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
FirstVal = loOperand(Val);
SecondVal = hiOperand(Val);
} else {
@@ -2991,7 +3110,7 @@
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
FirstVal = hiOperand(Val);
SecondVal = loOperand(Val);
} else {
@@ -3099,7 +3218,7 @@
void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
Operand *Ptr, Operand *Expected,
Operand *Desired) {
- if (Expected->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
// Reserve the pre-colored registers first, before adding any more
// infinite-weight variables from formMemoryOperand's legalization.
Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
@@ -3217,7 +3336,7 @@
Func->setError("Unknown AtomicRMW operation");
return;
case Intrinsics::AtomicAdd: {
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// All the fall-through paths must set this to true, but use this
// for asserting.
NeedsCmpxchg = true;
@@ -3235,7 +3354,7 @@
return;
}
case Intrinsics::AtomicSub: {
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
Op_Lo = &TargetX86Base<Machine>::_sub;
Op_Hi = &TargetX86Base<Machine>::_sbb;
@@ -3272,7 +3391,7 @@
Op_Hi = &TargetX86Base<Machine>::_xor;
break;
case Intrinsics::AtomicExchange:
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
// NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
// just need to be moved to the ecx and ebx registers.
@@ -3326,7 +3445,7 @@
// If Op_{Lo,Hi} are nullptr, then just copy the value.
Val = legalize(Val);
Type Ty = Val->getType();
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
@@ -3458,7 +3577,7 @@
if (!Cttz) {
_xor(T_Dest, ThirtyOne);
}
- if (Ty == IceType_i32) {
+ if (Traits::Is64Bit || Ty == IceType_i32) {
_mov(Dest, T_Dest);
return;
}
@@ -4138,7 +4257,7 @@
std::swap(SrcT, SrcF);
Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
}
- if (DestTy == IceType_i64) {
+ if (!Traits::Is64Bit && DestTy == IceType_i64) {
SrcT = legalizeUndef(SrcT);
SrcF = legalizeUndef(SrcF);
// Set the low portion.
@@ -4160,7 +4279,8 @@
return;
}
- assert(DestTy == IceType_i16 || DestTy == IceType_i32);
+ assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
+ (Traits::Is64Bit && DestTy == IceType_i64));
Variable *T = nullptr;
SrcF = legalize(SrcF);
_mov(T, SrcF);
@@ -4177,7 +4297,7 @@
formMemoryOperand(Addr, Value->getType());
Type Ty = NewAddr->getType();
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Value = legalizeUndef(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
@@ -4225,7 +4345,7 @@
uint64_t Min, uint64_t Max) {
// TODO(ascull): 64-bit should not reach here but only because it is not
// implemented yet. This should be able to handle the 64-bit case.
- assert(Comparison->getType() != IceType_i64);
+ assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
// Subtracting 0 is a nop so don't do it
if (Min != 0) {
// Avoid clobbering the comparison by copying it
@@ -4324,7 +4444,7 @@
assert(CaseClusters.size() != 0); // Should always be at least one
- if (Src0->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Src0 = legalize(Src0); // get Base/Index into physical registers
Operand *Src0Lo = loOperand(Src0);
Operand *Src0Hi = hiOperand(Src0);
@@ -4529,7 +4649,7 @@
Operand *Src = RMW->getData();
Type Ty = Src->getType();
typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Src = legalizeUndef(Src);
Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
@@ -4563,7 +4683,8 @@
return;
}
} else {
- // i8, i16, i32
+ // x86-32: i8, i16, i32
+ // x86-64: i8, i16, i32, i64
switch (RMW->getOp()) {
default:
// TODO(stichnot): Implement other arithmetic operators.
@@ -4608,8 +4729,14 @@
/// turned into zeroes, since loOperand() and hiOperand() don't expect
/// Undef input.
template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
- // Pause constant blinding or pooling, blinding or pooling will be done later
- // during phi lowering assignments
+ if (Traits::Is64Bit) {
+ // On x86-64 we don't need to prelower phis -- the architecture can handle
+ // 64-bit integer natively.
+ return;
+ }
+
+ // Pause constant blinding or pooling, blinding or pooling will be done
+ // later during phi lowering assignments
BoolFlagSaver B(RandomizationPoolingPaused, true);
PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
this, Context.getNode(), Func);
@@ -4770,6 +4897,16 @@
// There should be no constants of vector type (other than undef).
assert(!isVectorType(Ty));
+ // If the operand is a 64 bit constant integer we need to legalize it to a
+ // register in x86-64.
+ if (Traits::Is64Bit) {
+ if (llvm::isa<ConstantInteger64>(Const)) {
+ Variable *V = copyToReg(Const, RegNum);
+ V->setWeightInfinite();
+ return V;
+ }
+ }
+
// If the operand is an 32 bit constant integer, we should check
// whether we need to randomize it or pool it.
if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
@@ -4907,7 +5044,7 @@
template <class Machine>
Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for x86-32.
- assert(Type != IceType_i64);
+ assert(Traits::Is64Bit || Type != IceType_i64);
Variable *Reg = Func->makeVariable(Type);
if (RegNum == Variable::NoRegister)
Reg->setWeightInfinite();
@@ -4939,8 +5076,15 @@
}
template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
- llvm::report_fatal_error("Not expecting to emit 64-bit integers");
+void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
+ if (!Traits::Is64Bit) {
+ llvm::report_fatal_error("Not expecting to emit 64-bit integers");
+ } else {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Ctx->getStrEmit();
+ Str << getConstantPrefix() << C->getValue();
+ }
}
template <class Machine>
@@ -5085,8 +5229,8 @@
MemOperand->getBase(), Mask1);
// If we have already assigned a physical register, we must come from
// advancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
- // chain. So we add RegNum argument here.
+ // the assigned register as this assignment is that start of its
+ // use-def chain. So we add RegNum argument here.
Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
_lea(RegTemp, TempMemOperand);
// As source operand doesn't use the dstreg, we don't need to add
diff --git a/unittest/AssemblerX8632/DataMov.cpp b/unittest/AssemblerX8632/DataMov.cpp
index cb2012e..d41acd1 100644
--- a/unittest/AssemblerX8632/DataMov.cpp
+++ b/unittest/AssemblerX8632/DataMov.cpp
@@ -538,7 +538,8 @@
\
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##Src, Immediate(Value)); \
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
- __ movd(XmmRegister::Encoded_Reg_##Dst, GPRRegister::Encoded_Reg_##Src); \
+ __ movd(IceType_i32, XmmRegister::Encoded_Reg_##Dst, \
+ GPRRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
\
@@ -560,7 +561,7 @@
const uint64_t V1 = 0xFFFFFFFF00000000ull; \
\
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
- __ movd(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
+ __ movd(IceType_i32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
\
@@ -609,7 +610,8 @@
const uint32_t V0 = Value; \
\
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
- __ movd(GPRRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \
+ __ movd(IceType_i32, GPRRegister::Encoded_Reg_##Dst, \
+ XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
\
@@ -631,7 +633,7 @@
const uint32_t V1 = ~(Value); \
\
__ movss(IceType_f64, XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
- __ movd(dwordAddress(T1), XmmRegister::Encoded_Reg_##Src); \
+ __ movd(IceType_i32, dwordAddress(T1), XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
\
diff --git a/unittest/AssemblerX8632/XmmArith.cpp b/unittest/AssemblerX8632/XmmArith.cpp
index 45ff3a9..a85c8f9 100644
--- a/unittest/AssemblerX8632/XmmArith.cpp
+++ b/unittest/AssemblerX8632/XmmArith.cpp
@@ -1072,7 +1072,7 @@
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##SrcValue)); \
- __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
+ __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
GPRRegister::Encoded_Reg_##GPR); \
\
AssembledTest test = assemble(); \
@@ -1092,7 +1092,7 @@
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##DstValue)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
- __ cvt##Inst(IceType_f##Size, GPRRegister::Encoded_Reg_##GPR, \
+ __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
XmmRegister::Encoded_Reg_##Src); \
\
AssembledTest test = assemble(); \
@@ -1132,7 +1132,7 @@
const uint32_t T1 = allocateDword(); \
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
- __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
+ __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
@@ -1152,7 +1152,7 @@
\
__ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
Immediate(Inst##Size##DstValue)); \
- __ cvt##Inst(IceType_f##Size, GPRRegister::Encoded_Reg_##GPR, \
+ __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
diff --git a/unittest/AssemblerX8664/DataMov.cpp b/unittest/AssemblerX8664/DataMov.cpp
index 0610b45..6e83fce 100644
--- a/unittest/AssemblerX8664/DataMov.cpp
+++ b/unittest/AssemblerX8664/DataMov.cpp
@@ -263,6 +263,32 @@
#undef TestRegAddr
}
+TEST_F(AssemblerX8664Test, Movabs) {
+#define TestImplValue(Dst, Value) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Value ")"; \
+ uint64_t V = (Value); \
+ __ movabs(Encoded_GPR_##Dst##q(), V); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.run(); \
+ \
+ ASSERT_EQ(V, test.DST()) << TestString; \
+ } while (0)
+
+#define TestImpl(Dst) \
+ do { \
+ for (uint64_t V = {0, 1, 0xFFFFFFull, 0x80000000ull, \
+ 0xFFFFFFFFFFFFFFFFull}) { \
+ TestImpl(Dst, V); \
+ } \
+ } while (0)
+
+#undef TestImpl
+#undef TestImplValue
+}
+
TEST_F(AssemblerX8664Test, Movzx) {
static constexpr uint32_t Mask8 = 0x000000FF;
static constexpr uint32_t Mask16 = 0x0000FFFF;
@@ -677,7 +703,7 @@
}
TEST_F(AssemblerX8664Test, MovdToXmm) {
-#define TestMovdXmmReg(Src, Dst, Value) \
+#define TestMovdXmmReg32(Src, Dst, Value) \
do { \
assert(((Value)&0xFFFFFFFF) == (Value)); \
static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
@@ -686,7 +712,7 @@
\
__ mov(IceType_i32, Encoded_GPR_##Src(), Immediate(Value)); \
__ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
- __ movd(Encoded_Xmm_##Dst(), Encoded_GPR_##Src()); \
+ __ movd(IceType_i32, Encoded_Xmm_##Dst(), Encoded_GPR_##Src()); \
\
AssembledTest test = assemble(); \
\
@@ -698,7 +724,35 @@
reset(); \
} while (0)
-#define TestMovdXmmAddr(Dst, Value) \
+#define TestMovdXmmReg64(Src, Dst, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = 0xFFFFFFFF00000000ull; \
+ const uint64_t Expected = (static_cast<uint64_t>(Value) << 32) | (Value); \
+ \
+ __ movabs(Encoded_GPR_##Src(), Expected); \
+ __ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movd(IceType_i64, Encoded_Xmm_##Dst(), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovdXmmReg(Src, Dst, Value) \
+ do { \
+ TestMovdXmmReg32(Src, Dst, Value); \
+ TestMovdXmmReg64(Src, Dst, Value); \
+ } while (0)
+
+#define TestMovdXmmAddr32(Dst, Value) \
do { \
assert(((Value)&0xFFFFFFFF) == (Value)); \
static constexpr char TestString[] = "(" #Dst ", Addr)"; \
@@ -708,7 +762,7 @@
const uint64_t V1 = 0xFFFFFFFF00000000ull; \
\
__ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
- __ movd(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movd(IceType_i32, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
\
@@ -721,6 +775,35 @@
reset(); \
} while (0)
+#define TestMovdXmmAddr64(Dst, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Dst ", Addr)"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint32_t V0 = (static_cast<uint64_t>(Value) << 32) | (Value); \
+ const uint32_t T1 = allocateQword(); \
+ const uint64_t V1 = 0xFFFFFFFF00000000ull; \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movd(IceType_i64, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setDwordTo(T0, V0); \
+ test.setQwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovdXmmAddr(Dst, Value) \
+ do { \
+ TestMovdXmmAddr32(Dst, Value); \
+ TestMovdXmmAddr64(Dst, Value); \
+ } while (0)
+
#define TestMovd(Dst) \
do { \
for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) { \
@@ -759,13 +842,17 @@
TestMovd(xmm14);
TestMovd(xmm15);
-#undef TestMovdXmmAddr
-#undef TestMovdXmmReg
#undef TestMovd
+#undef TestMovdXmmAddr
+#undef TestMovdXmmAddr64
+#undef TestMovdXmmAddr32
+#undef TestMovdXmmReg
+#undef TestMovdXmmReg64
+#undef TestMovdXmmReg32
}
TEST_F(AssemblerX8664Test, MovdFromXmm) {
-#define TestMovdRegXmm(Src, Dst, Value) \
+#define TestMovdRegXmm32(Src, Dst, Value) \
do { \
assert(((Value)&0xFFFFFFFF) == (Value)); \
static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
@@ -773,7 +860,7 @@
const uint32_t V0 = Value; \
\
__ movss(IceType_f64, Encoded_Xmm_##Src(), dwordAddress(T0)); \
- __ movd(Encoded_GPR_##Dst(), Encoded_Xmm_##Src()); \
+ __ movd(IceType_i32, Encoded_GPR_##Dst(), Encoded_Xmm_##Src()); \
\
AssembledTest test = assemble(); \
\
@@ -785,7 +872,33 @@
reset(); \
} while (0)
-#define TestMovdAddrXmm(Src, Value) \
+#define TestMovdRegXmm64(Src, Dst, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint64_t V0 = (static_cast<uint64_t>(Value) << 32) | (Value); \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movd(IceType_i64, Encoded_GPR_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.contentsOfQword(T0)) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovdRegXmm(Src, Dst, Value) \
+ do { \
+ TestMovdRegXmm32(Src, Dst, Value); \
+ TestMovdRegXmm64(Src, Dst, Value); \
+ } while (0)
+
+#define TestMovdAddrXmm32(Src, Value) \
do { \
assert(((Value)&0xFFFFFFFF) == (Value)); \
static constexpr char TestString[] = "(" #Src ", Addr)"; \
@@ -795,7 +908,7 @@
const uint32_t V1 = ~(Value); \
\
__ movss(IceType_f64, Encoded_Xmm_##Src(), dwordAddress(T0)); \
- __ movd(dwordAddress(T1), Encoded_Xmm_##Src()); \
+ __ movd(IceType_i32, dwordAddress(T1), Encoded_Xmm_##Src()); \
\
AssembledTest test = assemble(); \
\
@@ -808,6 +921,35 @@
reset(); \
} while (0)
+#define TestMovdAddrXmm64(Src, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Src ", Addr)"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = (static_cast<uint64_t>(Value) << 32) | Value; \
+ const uint32_t T1 = allocateQword(); \
+ const uint64_t V1 = ~V0; \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movd(IceType_i64, dwordAddress(T1), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.setQwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.contentsOfQword(T1)) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovdAddrXmm(Src, Value) \
+ do { \
+ TestMovdAddrXmm32(Src, Value); \
+ TestMovdAddrXmm64(Src, Value); \
+ } while (0)
+
#define TestMovd(Src) \
do { \
for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) { \
@@ -846,9 +988,13 @@
TestMovd(xmm14);
TestMovd(xmm15);
-#undef TestMovdAddrXmm
-#undef TestMovdRegXmm
#undef TestMovd
+#undef TestMovdAddrXmm
+#undef TestMovdAddrXmm64
+#undef TestMovdAddrXmm32
+#undef TestMovdRegXmm
+#undef TestMovdRegXmm64
+#undef TestMovdRegXmm32
}
TEST_F(AssemblerX8664Test, MovqXmmAddr) {
diff --git a/unittest/AssemblerX8664/XmmArith.cpp b/unittest/AssemblerX8664/XmmArith.cpp
index ac51c02..e43413a 100644
--- a/unittest/AssemblerX8664/XmmArith.cpp
+++ b/unittest/AssemblerX8664/XmmArith.cpp
@@ -1104,15 +1104,16 @@
reset(); \
} while (0)
-#define TestImplSXmmReg(Dst, GPR, Inst, Size) \
+#define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType) \
do { \
static constexpr char TestString[] = \
- "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \
+ "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")"; \
const uint32_t T0 = allocateDqword(); \
\
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
- __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR()); \
+ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
+ Encoded_GPR_##GPR()); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##DstValue); \
@@ -1122,21 +1123,23 @@
reset(); \
} while (0)
-#define TestImplSRegXmm(GPR, Src, Inst, Size) \
+#define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size) \
do { \
static constexpr char TestString[] = \
- "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \
+ "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")"; \
const uint32_t T0 = allocateDqword(); \
\
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
__ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
- __ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src()); \
+ __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
+ Encoded_Xmm_##Src()); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##SrcValue); \
test.run(); \
\
- ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
+ ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
+ test.GPR()) \
<< TestString; \
reset(); \
} while (0)
@@ -1160,15 +1163,16 @@
reset(); \
} while (0)
-#define TestImplSXmmAddr(Dst, Inst, Size) \
+#define TestImplSXmmAddr(Dst, Inst, Size, IntType) \
do { \
static constexpr char TestString[] = \
- "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
+ "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")"; \
const uint32_t T0 = allocateDqword(); \
const uint32_t T1 = allocateDword(); \
\
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
- __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
+ dwordAddress(T1)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##DstValue); \
@@ -1179,20 +1183,22 @@
reset(); \
} while (0)
-#define TestImplSRegAddr(GPR, Inst, Size) \
+#define TestImplSRegAddr(GPR, Inst, IntSize, Size) \
do { \
static constexpr char TestString[] = \
- "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \
+ "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")"; \
const uint32_t T0 = allocateDqword(); \
\
__ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
- __ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), dwordAddress(T0)); \
+ __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
+ dwordAddress(T0)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, Inst##Size##SrcValue); \
test.run(); \
\
- ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
+ ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
+ test.GPR()) \
<< TestString; \
reset(); \
} while (0)
@@ -1203,10 +1209,14 @@
TestImplPXmmAddr(Src, dq2ps, Size); \
TestImplPXmmXmm(Dst, Src, tps2dq, Size); \
TestImplPXmmAddr(Src, tps2dq, Size); \
- TestImplSXmmReg(Dst, GPR, si2ss, Size); \
- TestImplSXmmAddr(Dst, si2ss, Size); \
- TestImplSRegXmm(GPR, Src, tss2si, Size); \
- TestImplSRegAddr(GPR, tss2si, Size); \
+ TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32); \
+ TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64); \
+ TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32); \
+ TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64); \
+ TestImplSRegXmm(GPR, Src, tss2si, 32, Size); \
+ TestImplSRegXmm(GPR, Src, tss2si, 64, Size); \
+ TestImplSRegAddr(GPR, tss2si, 32, Size); \
+ TestImplSRegAddr(GPR, tss2si, 64, Size); \
TestImplPXmmXmm(Dst, Src, float2float, Size); \
TestImplPXmmAddr(Src, float2float, Size); \
} while (0)