Add a few Subzero intrinsics (not the atomic ones yet).
Handle:
* mem{cpy,move,set} (without optimizations for known lengths)
* nacl.read.tp
* setjmp, longjmp
* trap
Mostly see if the dispatching/organization is okay.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/321993002
diff --git a/Makefile.standalone b/Makefile.standalone
index beaf9b6..9aa2fdb 100644
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -39,6 +39,7 @@
IceGlobalContext.cpp \
IceInst.cpp \
IceInstX8632.cpp \
+ IceIntrinsics.cpp \
IceLiveness.cpp \
IceOperand.cpp \
IceRegAlloc.cpp \
diff --git a/crosstest/mem_intrin.cpp b/crosstest/mem_intrin.cpp
new file mode 100644
index 0000000..baeb06d
--- /dev/null
+++ b/crosstest/mem_intrin.cpp
@@ -0,0 +1,97 @@
+/*
+ * Simple sanity test of memcpy, memmove, and memset intrinsics.
+ * (fixed length buffers, variable length buffers, etc.)
+ */
+
+#include <stdint.h> /* cstdint requires -std=c++0x or higher */
+#include <cstdlib>
+#include <cstring>
+
+#include "mem_intrin.h"
+
+typedef int elem_t;
+
+/*
+ * Reset buf to the sequence of bytes: n, n+1, n+2 ... length - 1
+ */
+static void __attribute__((noinline)) reset_buf(uint8_t *buf,
+ uint8_t init,
+ size_t length) {
+ size_t i;
+ size_t v = init;
+ for (i = 0; i < length; ++i)
+ buf[i] = v++;
+}
+
+/* Do a fletcher-16 checksum so that the order of the values matter.
+ * (Not doing a fletcher-32 checksum, since we are working with
+ * smaller buffers, whose total won't approach 2**16).
+ */
+static int __attribute__((noinline)) fletcher_checksum(uint8_t *buf,
+ size_t length) {
+ size_t i;
+ int sum = 0;
+ int sum_of_sums = 0;
+ const int kModulus = 255;
+ for (i = 0; i < length; ++i) {
+ sum = (sum + buf[i]) % kModulus;
+ sum_of_sums = (sum_of_sums + sum) % kModulus;
+ }
+ return (sum_of_sums << 8) | sum;
+}
+
+#define NWORDS 32
+#define BYTE_LENGTH (NWORDS * sizeof(elem_t))
+
+int memcpy_test_fixed_len(uint8_t init) {
+ elem_t buf[NWORDS];
+ elem_t buf2[NWORDS];
+ reset_buf((uint8_t *)buf, init, BYTE_LENGTH);
+ memcpy((void *)buf2, (void *)buf, BYTE_LENGTH);
+ return fletcher_checksum((uint8_t*)buf2, BYTE_LENGTH);
+}
+
+int memmove_test_fixed_len(uint8_t init) {
+ elem_t buf[NWORDS];
+ reset_buf((uint8_t *)buf, init, BYTE_LENGTH);
+ memmove((void *)(buf + 4), (void *)buf, BYTE_LENGTH - (4 * sizeof(elem_t)));
+ return fletcher_checksum((uint8_t*)buf + 4, BYTE_LENGTH - 4);
+}
+
+int memset_test_fixed_len(uint8_t init) {
+ elem_t buf[NWORDS];
+ memset((void *)buf, init, BYTE_LENGTH);
+ return fletcher_checksum((uint8_t*)buf, BYTE_LENGTH);
+}
+
+int memcpy_test(uint8_t *buf, void *buf2, uint8_t init, size_t length) {
+ reset_buf(buf, init, length);
+ memcpy(buf2, (void *)buf, length);
+ return fletcher_checksum((uint8_t *)buf2, length);
+}
+
+int memmove_test(uint8_t *buf, void *buf2, uint8_t init, size_t length) {
+ int sum1;
+ int sum2;
+ const int overlap_bytes = 4 * sizeof(elem_t);
+ if (length <= overlap_bytes)
+ return 0;
+ uint8_t *overlap_buf = buf + overlap_bytes;
+ size_t reduced_length = length - overlap_bytes;
+ reset_buf(buf, init, length);
+
+ /* Test w/ overlap. */
+ memmove((void *)overlap_buf, (void *)buf, reduced_length);
+ sum1 = fletcher_checksum(overlap_buf, reduced_length);
+ /* Test w/out overlap. */
+ memmove(buf2, (void *)buf, length);
+ sum2 = fletcher_checksum((uint8_t *)buf2, length);
+ return sum1 + sum2;
+}
+
+int memset_test(uint8_t *buf, void *buf2, uint8_t init, size_t length) {
+ memset((void *)buf, init, length);
+ memset(buf2, init + 4, length);
+ return fletcher_checksum(buf, length) +
+ fletcher_checksum((uint8_t *)buf2, length);
+}
diff --git a/crosstest/mem_intrin.h b/crosstest/mem_intrin.h
new file mode 100644
index 0000000..97e6dcc
--- /dev/null
+++ b/crosstest/mem_intrin.h
@@ -0,0 +1,19 @@
+/*
+ * Simple sanity test of memcpy, memmove, and memset intrinsics.
+ * (fixed length buffers, variable length buffers, etc.).
+ * There is no include guard since this will be included multiple times,
+ * under different namespaces.
+ */
+
+/* Declare first buf as uint8_t * and second as void *, to avoid C++
+ * name mangling's use of substitutions. Otherwise Subzero's name
+ * mangling injection will need to bump each substitution sequence ID
+ * up by one (e.g., from S_ to S0_ and S1_ to S2_).
+ */
+int memcpy_test(uint8_t *buf, void *buf2, uint8_t init, size_t length);
+int memmove_test(uint8_t *buf, void *buf2, uint8_t init, size_t length);
+int memset_test(uint8_t *buf, void *buf2, uint8_t init, size_t length);
+
+int memcpy_test_fixed_len(uint8_t init);
+int memmove_test_fixed_len(uint8_t init);
+int memset_test_fixed_len(uint8_t init);
diff --git a/crosstest/mem_intrin_main.cpp b/crosstest/mem_intrin_main.cpp
new file mode 100644
index 0000000..76df66a
--- /dev/null
+++ b/crosstest/mem_intrin_main.cpp
@@ -0,0 +1,69 @@
+/* crosstest.py --test=mem_intrin.cpp --driver=mem_intrin_main.cpp \
+ --prefix=Subzero_ --output=mem_intrin */
+
+#include <stdint.h> /* cstdint requires -std=c++0x or higher */
+#include <cstdio>
+
+#include "mem_intrin.h"
+namespace Subzero_ {
+#include "mem_intrin.h"
+}
+
+#define XSTR(s) STR(s)
+#define STR(s) #s
+
+void testFixedLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+#define do_test_fixed(test_func) \
+ for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
+ ++TotalTests; \
+ int llc_result = test_func(init_val); \
+ int sz_result = Subzero_::test_func(init_val); \
+ if (llc_result == sz_result) { \
+ ++Passes; \
+ } else { \
+ ++Failures; \
+ printf("Failure (%s): init_val=%d, llc=%d, sz=%d\n", \
+ STR(test_func), init_val, llc_result, sz_result); \
+ } \
+ }
+
+ do_test_fixed(memcpy_test_fixed_len)
+ do_test_fixed(memmove_test_fixed_len)
+ do_test_fixed(memset_test_fixed_len)
+#undef do_test_fixed
+}
+
+void testVariableLen(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+ uint8_t buf[256];
+ uint8_t buf2[256];
+#define do_test_variable(test_func) \
+ for (size_t len = 4; len < 128; ++len) { \
+ for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
+ ++TotalTests; \
+ int llc_result = test_func(buf, (void *)buf2, init_val, len); \
+ int sz_result = Subzero_::test_func(buf, (void *)buf2, init_val, len); \
+ if (llc_result == sz_result) { \
+ ++Passes; \
+ } else { \
+ ++Failures; \
+ printf("Failure (%s): init_val=%d, len=%d, llc=%d, sz=%d\n", \
+ STR(test_func), init_val, len, llc_result, sz_result); \
+ } \
+ } \
+ }
+
+ do_test_variable(memcpy_test)
+ do_test_variable(memmove_test)
+ do_test_variable(memset_test)
+#undef do_test_variable
+}
+
+int main(int argc, char **argv) {
+ unsigned TotalTests = 0;
+ unsigned Passes = 0;
+ unsigned Failures = 0;
+ testFixedLen(TotalTests, Passes, Failures);
+ testVariableLen(TotalTests, Passes, Failures);
+ printf("TotalTests=%u Passes=%u Failures=%u\n", TotalTests, Passes, Failures);
+ return Failures;
+}
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index 4ba208f..d89e1b9 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -23,6 +23,20 @@
./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
--dir="${OUTDIR}" \
--llvm-bin-path="${LLVM_BIN_PATH}" \
+ --test=mem_intrin.cpp \
+ --driver=mem_intrin_main.cpp \
+ --output=mem_intrin_O${optlevel}
+
+ ./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
+ --dir="${OUTDIR}" \
+ --llvm-bin-path="${LLVM_BIN_PATH}" \
+ --test=test_arith.cpp --test=test_arith_frem.ll \
+ --driver=test_arith_main.cpp \
+ --output=test_arith_O${optlevel}
+
+ ./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
+ --dir="${OUTDIR}" \
+ --llvm-bin-path="${LLVM_BIN_PATH}" \
--test=test_cast.cpp --test=test_cast_to_u1.ll \
--driver=test_cast_main.cpp \
--output=test_cast_O${optlevel}
@@ -41,19 +55,13 @@
--driver=test_icmp_main.cpp \
--output=test_icmp_O${optlevel}
- ./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
- --dir="${OUTDIR}" \
- --llvm-bin-path="${LLVM_BIN_PATH}" \
- --test=test_arith.cpp --test=test_arith_frem.ll \
- --driver=test_arith_main.cpp \
- --output=test_arith_O${optlevel}
-
done
for optlevel in ${OPTLEVELS} ; do
"${OUTDIR}"/simple_loop_O${optlevel}
+ "${OUTDIR}"/mem_intrin_O${optlevel}
+ "${OUTDIR}"/test_arith_O${optlevel}
"${OUTDIR}"/test_cast_O${optlevel}
"${OUTDIR}"/test_fcmp_O${optlevel}
"${OUTDIR}"/test_icmp_O${optlevel}
- "${OUTDIR}"/test_arith_O${optlevel}
done
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index 088421f..c46d7d45 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -20,6 +20,7 @@
#include "llvm/Support/raw_ostream.h"
#include "IceDefs.h"
+#include "IceIntrinsics.h"
#include "IceTypes.h"
namespace Ice {
@@ -88,6 +89,8 @@
// Allocate data of type T using the global allocator.
template <typename T> T *allocate() { return Allocator.Allocate<T>(); }
+ const Intrinsics &getIntrinsicsInfo() const { return IntrinsicsInfo; }
+
private:
Ostream StrDump; // Stream for dumping / diagnostics
Ostream StrEmit; // Stream for code emission
@@ -95,6 +98,7 @@
llvm::BumpPtrAllocator Allocator;
VerboseMask VMask;
llvm::OwningPtr<class ConstantPool> ConstPool;
+ Intrinsics IntrinsicsInfo;
const TargetArch Arch;
const OptLevel Opt;
const IceString TestPrefix;
diff --git a/src/IceInst.h b/src/IceInst.h
index a465eda..0397e02 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -18,6 +18,7 @@
#include "IceDefs.h"
#include "IceInst.def"
+#include "IceIntrinsics.h"
#include "IceTypes.h"
// TODO: The Cfg structure, and instructions in particular, need to be
@@ -42,6 +43,7 @@
Cast,
Fcmp,
Icmp,
+ IntrinsicCall,
Load,
Phi,
Ret,
@@ -286,8 +288,13 @@
public:
static InstCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest,
Operand *CallTarget) {
+ // Set HasSideEffects to true so that the call instruction can't be
+ // dead-code eliminated. IntrinsicCalls can override this if the
+ // particular intrinsic is deletable and has no side-effects.
+ const bool HasSideEffects = true;
+ const InstKind Kind = Inst::Call;
return new (Func->allocateInst<InstCall>())
- InstCall(Func, NumArgs, Dest, CallTarget);
+ InstCall(Func, NumArgs, Dest, CallTarget, HasSideEffects, Kind);
}
void addArg(Operand *Arg) { addSource(Arg); }
Operand *getCallTarget() const { return getSrc(0); }
@@ -296,18 +303,18 @@
virtual void dump(const Cfg *Func) const;
static bool classof(const Inst *Inst) { return Inst->getKind() == Call; }
-private:
- InstCall(Cfg *Func, SizeT NumArgs, Variable *Dest, Operand *CallTarget)
- : Inst(Func, Inst::Call, NumArgs + 1, Dest) {
- // Set HasSideEffects so that the call instruction can't be
- // dead-code eliminated. Don't set this for a deletable intrinsic
- // call.
- HasSideEffects = true;
+protected:
+ InstCall(Cfg *Func, SizeT NumArgs, Variable *Dest, Operand *CallTarget,
+ bool HasSideEff, InstKind Kind)
+ : Inst(Func, Kind, NumArgs + 1, Dest) {
+ HasSideEffects = HasSideEff;
addSource(CallTarget);
}
+ virtual ~InstCall() {}
+
+private:
InstCall(const InstCall &) LLVM_DELETED_FUNCTION;
InstCall &operator=(const InstCall &) LLVM_DELETED_FUNCTION;
- virtual ~InstCall() {}
};
// Cast instruction (a.k.a. conversion operation).
@@ -395,6 +402,34 @@
const ICond Condition;
};
+// Call to an intrinsic function. The call target is captured as getSrc(0),
+// and arg I is captured as getSrc(I+1).
+class InstIntrinsicCall : public InstCall {
+public:
+ static InstIntrinsicCall *create(Cfg *Func, SizeT NumArgs, Variable *Dest,
+ Operand *CallTarget,
+ const Intrinsics::IntrinsicInfo &Info) {
+ return new (Func->allocateInst<InstIntrinsicCall>())
+ InstIntrinsicCall(Func, NumArgs, Dest, CallTarget, Info);
+ }
+ static bool classof(const Inst *Inst) {
+ return Inst->getKind() == IntrinsicCall;
+ }
+
+ Intrinsics::IntrinsicInfo getIntrinsicInfo() const { return Info; }
+
+private:
+ InstIntrinsicCall(Cfg *Func, SizeT NumArgs, Variable *Dest,
+ Operand *CallTarget, const Intrinsics::IntrinsicInfo &Info)
+ : InstCall(Func, NumArgs, Dest, CallTarget, Info.HasSideEffects,
+ Inst::IntrinsicCall),
+ Info(Info) {}
+ InstIntrinsicCall(const InstIntrinsicCall &) LLVM_DELETED_FUNCTION;
+ InstIntrinsicCall &operator=(const InstIntrinsicCall &) LLVM_DELETED_FUNCTION;
+ virtual ~InstIntrinsicCall() {}
+ const Intrinsics::IntrinsicInfo Info;
+};
+
// Load instruction. The source address is captured in getSrc(0).
class InstLoad : public Inst {
public:
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 17e5712..6477683 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -50,6 +50,15 @@
const size_t TypeX8632AttributesSize =
llvm::array_lengthof(TypeX8632Attributes);
+const char *InstX8632SegmentRegNames[] = {
+#define X(val, name) \
+ name,
+ SEG_REGX8632_TABLE
+#undef X
+};
+const size_t InstX8632SegmentRegNamesSize =
+ llvm::array_lengthof(InstX8632SegmentRegNames);
+
} // end of anonymous namespace
const char *InstX8632::getWidthString(Type Ty) {
@@ -58,9 +67,9 @@
OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
Constant *Offset, Variable *Index,
- uint32_t Shift)
+ uint16_t Shift, SegmentRegisters SegmentReg)
: OperandX8632(kMem, Ty), Base(Base), Offset(Offset), Index(Index),
- Shift(Shift) {
+ Shift(Shift), SegmentReg(SegmentReg) {
assert(Shift <= 3);
Vars = NULL;
NumVars = 0;
@@ -148,6 +157,9 @@
addSource(Src1);
}
+InstX8632UD2::InstX8632UD2(Cfg *Func)
+ : InstX8632(Func, InstX8632::UD2, 0, NULL) {}
+
InstX8632Test::InstX8632Test(Cfg *Func, Operand *Src1, Operand *Src2)
: InstX8632(Func, InstX8632::Test, 2, NULL) {
addSource(Src1);
@@ -525,6 +537,17 @@
dumpSources(Func);
}
+void InstX8632UD2::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 0);
+ Str << "\tud2\n";
+}
+
+void InstX8632UD2::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "ud2\n";
+}
+
void InstX8632Test::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
@@ -758,6 +781,11 @@
void OperandX8632Mem::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
Str << TypeX8632Attributes[getType()].WidthString << " ";
+ if (SegmentReg != DefaultSegment) {
+ assert(SegmentReg >= 0 &&
+ static_cast<size_t>(SegmentReg) < InstX8632SegmentRegNamesSize);
+ Str << InstX8632SegmentRegNames[SegmentReg] << ":";
+ }
// TODO: The following is an almost verbatim paste of dump().
bool Dumped = false;
Str << "[";
@@ -782,11 +810,14 @@
OffsetIsZero = (CI->getValue() == 0);
OffsetIsNegative = (static_cast<int64_t>(CI->getValue()) < 0);
}
- if (!OffsetIsZero) { // Suppress if Offset is known to be 0
- if (Dumped) {
+ if (Dumped) {
+ if (!OffsetIsZero) { // Suppress if Offset is known to be 0
if (!OffsetIsNegative) // Suppress if Offset is known to be negative
Str << "+";
+ Offset->emit(Func);
}
+ } else {
+ // There is only the offset.
Offset->emit(Func);
}
Str << "]";
@@ -794,6 +825,11 @@
void OperandX8632Mem::dump(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrDump();
+ if (SegmentReg != DefaultSegment) {
+ assert(SegmentReg >= 0 &&
+ static_cast<size_t>(SegmentReg) < InstX8632SegmentRegNamesSize);
+ Str << InstX8632SegmentRegNames[SegmentReg] << ":";
+ }
bool Dumped = false;
Str << "[";
if (Base) {
@@ -817,11 +853,14 @@
OffsetIsZero = (CI->getValue() == 0);
OffsetIsNegative = (static_cast<int64_t>(CI->getValue()) < 0);
}
- if (!OffsetIsZero) { // Suppress if Offset is known to be 0
- if (Dumped) {
+ if (Dumped) {
+ if (!OffsetIsZero) { // Suppress if Offset is known to be 0
if (!OffsetIsNegative) // Suppress if Offset is known to be negative
Str << "+";
+ Offset->dump(Func);
}
+ } else {
+ // There is only the offset.
Offset->dump(Func);
}
Str << "]";
diff --git a/src/IceInstX8632.def b/src/IceInstX8632.def
index d5e99c3..47650e1 100644
--- a/src/IceInstX8632.def
+++ b/src/IceInstX8632.def
@@ -38,6 +38,16 @@
//#define X(val, init, name, name16, name8, scratch, preserved, stackptr,
// frameptr, isI8, isInt, isFP)
+// X86 segment registers.
+#define SEG_REGX8632_TABLE \
+ /* enum value, name */ \
+ X(SegReg_CS, "cs") \
+ X(SegReg_DS, "ds") \
+ X(SegReg_ES, "es") \
+ X(SegReg_SS, "ss") \
+ X(SegReg_FS, "fs") \
+ X(SegReg_GS, "gs") \
+//#define X(val, name)
#define ICEINSTX8632BR_TABLE \
/* enum value, dump, emit */ \
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h
index 8a6f14a..7e6e199 100644
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -52,16 +52,26 @@
// value for the index register.
class OperandX8632Mem : public OperandX8632 {
public:
+ enum SegmentRegisters {
+ DefaultSegment = -1,
+#define X(val, name) \
+ val,
+ SEG_REGX8632_TABLE
+#undef X
+ SegReg_NUM
+ };
static OperandX8632Mem *create(Cfg *Func, Type Ty, Variable *Base,
Constant *Offset, Variable *Index = NULL,
- uint32_t Shift = 0) {
+ uint16_t Shift = 0,
+ SegmentRegisters SegmentReg = DefaultSegment) {
return new (Func->allocate<OperandX8632Mem>())
- OperandX8632Mem(Func, Ty, Base, Offset, Index, Shift);
+ OperandX8632Mem(Func, Ty, Base, Offset, Index, Shift, SegmentReg);
}
Variable *getBase() const { return Base; }
Constant *getOffset() const { return Offset; }
Variable *getIndex() const { return Index; }
- uint32_t getShift() const { return Shift; }
+ uint16_t getShift() const { return Shift; }
+ SegmentRegisters getSegmentRegister() const { return SegmentReg; }
virtual void emit(const Cfg *Func) const;
virtual void dump(const Cfg *Func) const;
@@ -71,14 +81,15 @@
private:
OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
- Variable *Index, uint32_t Shift);
+ Variable *Index, uint16_t Shift, SegmentRegisters SegmentReg);
OperandX8632Mem(const OperandX8632Mem &) LLVM_DELETED_FUNCTION;
OperandX8632Mem &operator=(const OperandX8632Mem &) LLVM_DELETED_FUNCTION;
virtual ~OperandX8632Mem() {}
Variable *Base;
Constant *Offset;
Variable *Index;
- uint32_t Shift;
+ uint16_t Shift;
+ SegmentRegisters SegmentReg : 16;
};
// VariableSplit is a way to treat an f64 memory location as a pair
@@ -160,6 +171,7 @@
Subss,
Test,
Ucomiss,
+ UD2,
Xor
};
static const char *getWidthString(Type Ty);
@@ -531,6 +543,23 @@
virtual ~InstX8632Ucomiss() {}
};
+// UD2 instruction.
+class InstX8632UD2 : public InstX8632 {
+public:
+ static InstX8632UD2 *create(Cfg *Func) {
+ return new (Func->allocate<InstX8632UD2>()) InstX8632UD2(Func);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, UD2); }
+
+private:
+ InstX8632UD2(Cfg *Func);
+ InstX8632UD2(const InstX8632UD2 &) LLVM_DELETED_FUNCTION;
+ InstX8632UD2 &operator=(const InstX8632UD2 &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632UD2() {}
+};
+
// Test instruction.
class InstX8632Test : public InstX8632 {
public:
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
new file mode 100644
index 0000000..dbf79cf
--- /dev/null
+++ b/src/IceIntrinsics.cpp
@@ -0,0 +1,202 @@
+//===- subzero/src/IceIntrinsics.cpp - Functions related to intrinsics ----===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Intrinsics utilities for matching and
+// then dispatching by name.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceCfg.h"
+#include "IceCfgNode.h"
+#include "IceIntrinsics.h"
+#include "IceLiveness.h"
+#include "IceOperand.h"
+
+#include <utility>
+
+namespace Ice {
+
+namespace {
+
+const struct IceIntrinsicsEntry_ {
+ Intrinsics::FullIntrinsicInfo Info;
+ const char *IntrinsicName;
+} IceIntrinsicsTable[] = {
+#define AtomicCmpxchgInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::AtomicCmpxchg, true }, \
+ { Overload, IceType_i32, Overload, Overload, IceType_i32, IceType_i32 }, \
+ 6 \
+ } \
+ , "nacl.atomic.cmpxchg." NameSuffix \
+ }
+ AtomicCmpxchgInit(IceType_i8, "i8"),
+ AtomicCmpxchgInit(IceType_i16, "i16"),
+ AtomicCmpxchgInit(IceType_i32, "i32"),
+ AtomicCmpxchgInit(IceType_i64, "i64"),
+#undef AtomicCmpxchgInit
+ { { { Intrinsics::AtomicFence, true }, { IceType_void, IceType_i32 }, 2 },
+ "nacl.atomic.fence" },
+ { { { Intrinsics::AtomicFenceAll, true }, { IceType_void }, 1 },
+ "nacl.atomic.fence.all" },
+ { { { Intrinsics::AtomicIsLockFree, true },
+ { IceType_i1, IceType_i32, IceType_i32 }, 3 },
+ "nacl.atomic.is.lock.free" },
+
+#define AtomicLoadInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::AtomicLoad, true } \
+ , { Overload, IceType_i32, IceType_i32 }, 3 \
+ } \
+ , "nacl.atomic.load." NameSuffix \
+ }
+ AtomicLoadInit(IceType_i8, "i8"),
+ AtomicLoadInit(IceType_i16, "i16"),
+ AtomicLoadInit(IceType_i32, "i32"),
+ AtomicLoadInit(IceType_i64, "i64"),
+#undef AtomicLoadInit
+
+#define AtomicRMWInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::AtomicRMW, true } \
+ , { Overload, IceType_i32, IceType_i32, Overload, IceType_i32 }, 5 \
+ } \
+ , "nacl.atomic.rmw." NameSuffix \
+ }
+ AtomicRMWInit(IceType_i8, "i8"),
+ AtomicRMWInit(IceType_i16, "i16"),
+ AtomicRMWInit(IceType_i32, "i32"),
+ AtomicRMWInit(IceType_i64, "i64"),
+#undef AtomicRMWInit
+
+#define AtomicStoreInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::AtomicStore, true } \
+ , { IceType_void, Overload, IceType_i32, IceType_i32 }, 5 \
+ } \
+ , "nacl.atomic.store." NameSuffix \
+ }
+ AtomicStoreInit(IceType_i8, "i8"),
+ AtomicStoreInit(IceType_i16, "i16"),
+ AtomicStoreInit(IceType_i32, "i32"),
+ AtomicStoreInit(IceType_i64, "i64"),
+#undef AtomicStoreInit
+
+#define BswapInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::Bswap, false } \
+ , { Overload, Overload }, 2 \
+ } \
+ , "bswap." NameSuffix \
+ }
+ BswapInit(IceType_i16, "i16"),
+ BswapInit(IceType_i32, "i32"),
+ BswapInit(IceType_i64, "i64"),
+#undef BswapInit
+
+#define CtlzInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::Ctlz, false } \
+ , { Overload, Overload, IceType_i1 }, 3 \
+ } \
+ , "ctlz." NameSuffix \
+ }
+ CtlzInit(IceType_i32, "i32"),
+ CtlzInit(IceType_i64, "i64"),
+#undef CtlzInit
+
+#define CtpopInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::Ctpop, false } \
+ , { Overload, Overload }, 2 \
+ } \
+ , "ctpop." NameSuffix \
+ }
+ CtpopInit(IceType_i32, "i32"),
+ CtpopInit(IceType_i64, "i64"),
+#undef CtpopInit
+
+#define CttzInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::Cttz, false } \
+ , { Overload, Overload, IceType_i1 }, 3 \
+ } \
+ , "cttz." NameSuffix \
+ }
+ CttzInit(IceType_i32, "i32"),
+ CttzInit(IceType_i64, "i64"),
+#undef CttzInit
+ { { { Intrinsics::Longjmp, true },
+ { IceType_void, IceType_i32, IceType_i32 }, 3 },
+ "nacl.longjmp" },
+ { { { Intrinsics::Memcpy, true }, { IceType_void, IceType_i32, IceType_i32,
+ IceType_i32, IceType_i32, IceType_i1 },
+ 6 },
+ "memcpy.p0i8.p0i8.i32" },
+ { { { Intrinsics::Memmove, true },
+ { IceType_void, IceType_i32, IceType_i32,
+ IceType_i32, IceType_i32, IceType_i1 },
+ 6 },
+ "memmove.p0i8.p0i8.i32" },
+ { { { Intrinsics::Memset, true }, { IceType_void, IceType_i32, IceType_i8,
+ IceType_i32, IceType_i32, IceType_i1 },
+ 6 },
+ "memset.p0i8.i32" },
+ { { { Intrinsics::NaClReadTP, false }, { IceType_i32 }, 1 },
+ "nacl.read.tp" },
+ { { { Intrinsics::Setjmp, true }, { IceType_i32, IceType_i32 }, 2 },
+ "nacl.setjmp" },
+
+#define SqrtInit(Overload, NameSuffix) \
+ { \
+ { \
+ { Intrinsics::Sqrt, false } \
+ , { Overload, Overload }, 2 \
+ } \
+ , "sqrt." NameSuffix \
+ }
+ SqrtInit(IceType_f32, "f32"),
+ SqrtInit(IceType_f64, "f64"),
+#undef SqrtInit
+ { { { Intrinsics::Stacksave, true }, { IceType_i32 }, 1 }, "stacksave" },
+ { { { Intrinsics::Stackrestore, true }, { IceType_void, IceType_i32 }, 2 },
+ "stackrestore" },
+ { { { Intrinsics::Trap, true }, { IceType_void }, 1 }, "trap" }
+ };
+const size_t IceIntrinsicsTableSize = llvm::array_lengthof(IceIntrinsicsTable);
+
+} // end of namespace
+
+Intrinsics::Intrinsics() {
+ for (size_t I = 0; I < IceIntrinsicsTableSize; ++I) {
+ const struct IceIntrinsicsEntry_ &Entry = IceIntrinsicsTable[I];
+ assert(Entry.Info.NumTypes <= kMaxIntrinsicParameters);
+ map.insert(std::make_pair(IceString(Entry.IntrinsicName), Entry.Info));
+ }
+}
+
+Intrinsics::~Intrinsics() {}
+
+const Intrinsics::FullIntrinsicInfo *
+Intrinsics::find(const IceString &Name) const {
+ IntrinsicMap::const_iterator it = map.find(Name);
+ if (it == map.end())
+ return NULL;
+ return &it->second;
+}
+
+} // end of namespace Ice
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
new file mode 100644
index 0000000..4f9f7de
--- /dev/null
+++ b/src/IceIntrinsics.h
@@ -0,0 +1,94 @@
+//===- subzero/src/IceIntrinsics.h - List of Ice Intrinsics -----*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the kinds of intrinsics supported by PNaCl.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICEINTRINSICS_H
+#define SUBZERO_SRC_ICEINTRINSICS_H
+
+#include "IceDefs.h"
+
+namespace Ice {
+
+static const size_t kMaxIntrinsicParameters = 6;
+
+class Intrinsics {
+public:
+ Intrinsics();
+ ~Intrinsics();
+
+ // Some intrinsics allow overloading by type. This enum collapses all
+ // overloads into a single ID, but the type can still be recovered by the
+ // type of the intrinsic function call's return value and parameters.
+ enum IntrinsicID {
+ UnknownIntrinsic = 0,
+ // Arbitrary (alphabetical) order.
+ AtomicCmpxchg,
+ AtomicFence,
+ AtomicFenceAll,
+ AtomicIsLockFree,
+ AtomicLoad,
+ AtomicRMW,
+ AtomicStore,
+ Bswap,
+ Ctlz,
+ Ctpop,
+ Cttz,
+ Longjmp,
+ Memcpy,
+ Memmove,
+ Memset,
+ NaClReadTP,
+ Setjmp,
+ Sqrt,
+ Stacksave,
+ Stackrestore,
+ Trap
+ };
+
+ // Basic attributes related to each intrinsic, that are relevant to
+ // code generation. We will want to have more attributes (e.g., Setjmp
+ // returns twice and which affects stack coloring) once the lowering
+ // cares about such attributes. Perhaps the attributes representation
+ // can be shared with general function calls, though most functions
+ // will be opaque.
+ struct IntrinsicInfo {
+ IntrinsicID ID : 31;
+ bool HasSideEffects : 1;
+ };
+
+ // The complete set of information about an intrinsic.
+ struct FullIntrinsicInfo {
+ struct IntrinsicInfo Info; // Information that CodeGen would care about.
+
+ // Sanity check during parsing.
+ Type Signature[kMaxIntrinsicParameters];
+ uint8_t NumTypes;
+ };
+
+ // Find the information about a given intrinsic, based on function name.
+ // The function name is expected to have the common "llvm." prefix
+ // stripped. If found, returns a reference to a FullIntrinsicInfo entry
+ // (valid for the lifetime of the map). Otherwise returns null.
+ const FullIntrinsicInfo *find(const IceString &Name) const;
+
+private:
+ // TODO(jvoung): May want to switch to something like LLVM's StringMap.
+ typedef std::map<IceString, FullIntrinsicInfo> IntrinsicMap;
+ IntrinsicMap map;
+
+ Intrinsics(const Intrinsics &) LLVM_DELETED_FUNCTION;
+ Intrinsics &operator=(const Intrinsics &) LLVM_DELETED_FUNCTION;
+};
+
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICEINTRINSICS_H
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 72a3e8c..877f717 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -116,6 +116,9 @@
case Inst::Icmp:
lowerIcmp(llvm::dyn_cast<InstIcmp>(Inst));
break;
+ case Inst::IntrinsicCall:
+ lowerIntrinsicCall(llvm::dyn_cast<InstIntrinsicCall>(Inst));
+ break;
case Inst::Load:
lowerLoad(llvm::dyn_cast<InstLoad>(Inst));
break;
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 7f798a8..dbb9a42 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -167,6 +167,7 @@
virtual void lowerCast(const InstCast *Inst) = 0;
virtual void lowerFcmp(const InstFcmp *Inst) = 0;
virtual void lowerIcmp(const InstIcmp *Inst) = 0;
+ virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst) = 0;
virtual void lowerLoad(const InstLoad *Inst) = 0;
virtual void lowerPhi(const InstPhi *Inst) = 0;
virtual void lowerRet(const InstRet *Inst) = 0;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 0edcab5..449e413 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -431,6 +431,9 @@
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
}
+// static
+Type TargetX8632::stackSlotType() { return IceType_i32; }
+
void TargetX8632::addProlog(CfgNode *Node) {
// If SimpleCoalescing is false, each variable without a register
// gets its own unique stack slot, which leads to large stack
@@ -760,7 +763,7 @@
if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
Mem->getOffset(), Mem->getIndex(),
- Mem->getShift());
+ Mem->getShift(), Mem->getSegmentRegister());
}
llvm_unreachable("Unsupported operand type");
return NULL;
@@ -790,7 +793,8 @@
SymOffset->getName());
}
return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
- Mem->getIndex(), Mem->getShift());
+ Mem->getIndex(), Mem->getShift(),
+ Mem->getSegmentRegister());
}
llvm_unreachable("Unsupported operand type");
return NULL;
@@ -1774,6 +1778,91 @@
Context.insert(Label);
}
+void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
+ switch (Instr->getIntrinsicInfo().ID) {
+ case Intrinsics::AtomicCmpxchg:
+ case Intrinsics::AtomicFence:
+ case Intrinsics::AtomicFenceAll:
+ case Intrinsics::AtomicIsLockFree:
+ case Intrinsics::AtomicLoad:
+ case Intrinsics::AtomicRMW:
+ case Intrinsics::AtomicStore:
+ case Intrinsics::Bswap:
+ case Intrinsics::Ctlz:
+ case Intrinsics::Ctpop:
+ case Intrinsics::Cttz:
+ Func->setError("Unhandled intrinsic");
+ return;
+ case Intrinsics::Longjmp: {
+ InstCall *Call = makeHelperCall("longjmp", NULL, 2);
+ Call->addArg(Instr->getArg(0));
+ Call->addArg(Instr->getArg(1));
+ lowerCall(Call);
+ break;
+ }
+ case Intrinsics::Memcpy: {
+ // In the future, we could potentially emit an inline memcpy/memset, etc.
+ // for intrinsic calls w/ a known length.
+ InstCall *Call = makeHelperCall("memcpy", NULL, 3);
+ Call->addArg(Instr->getArg(0));
+ Call->addArg(Instr->getArg(1));
+ Call->addArg(Instr->getArg(2));
+ lowerCall(Call);
+ break;
+ }
+ case Intrinsics::Memmove: {
+ InstCall *Call = makeHelperCall("memmove", NULL, 3);
+ Call->addArg(Instr->getArg(0));
+ Call->addArg(Instr->getArg(1));
+ Call->addArg(Instr->getArg(2));
+ lowerCall(Call);
+ break;
+ }
+ case Intrinsics::Memset: {
+ // The value operand needs to be extended to a stack slot size
+ // because we "push" only works for a specific operand size.
+ Operand *ValOp = Instr->getArg(1);
+ assert(ValOp->getType() == IceType_i8);
+ Variable *ValExt = makeReg(stackSlotType());
+ _movzx(ValExt, ValOp);
+ InstCall *Call = makeHelperCall("memset", NULL, 3);
+ Call->addArg(Instr->getArg(0));
+ Call->addArg(ValExt);
+ Call->addArg(Instr->getArg(2));
+ lowerCall(Call);
+ break;
+ }
+ case Intrinsics::NaClReadTP: {
+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
+ Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL,
+ 0, OperandX8632Mem::SegReg_GS);
+ Variable *Dest = Instr->getDest();
+ Variable *T = NULL;
+ _mov(T, Src);
+ _mov(Dest, T);
+ break;
+ }
+ case Intrinsics::Setjmp: {
+ InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);
+ Call->addArg(Instr->getArg(0));
+ lowerCall(Call);
+ break;
+ }
+ case Intrinsics::Sqrt:
+ case Intrinsics::Stacksave:
+ case Intrinsics::Stackrestore:
+ Func->setError("Unhandled intrinsic");
+ return;
+ case Intrinsics::Trap:
+ _ud2();
+ break;
+ case Intrinsics::UnknownIntrinsic:
+ Func->setError("Should not be lowering UnknownIntrinsic");
+ return;
+ }
+ return;
+}
+
namespace {
bool isAdd(const Inst *Inst) {
@@ -1784,7 +1873,7 @@
return false;
}
-void computeAddressOpt(Variable *&Base, Variable *&Index, int32_t &Shift,
+void computeAddressOpt(Variable *&Base, Variable *&Index, uint16_t &Shift,
int32_t &Offset) {
(void)Offset; // TODO: pattern-match for non-zero offsets.
if (Base == NULL)
@@ -1965,14 +2054,20 @@
Variable *Dest = Inst->getDest();
Operand *Addr = Inst->getSrc(0);
Variable *Index = NULL;
- int32_t Shift = 0;
+ uint16_t Shift = 0;
int32_t Offset = 0; // TODO: make Constant
+ // Vanilla ICE load instructions should not use the segment registers,
+ // and computeAddressOpt only works at the level of Variables and Constants,
+ // not other OperandX8632Mem, so there should be no mention of segment
+ // registers there either.
+ const OperandX8632Mem::SegmentRegisters SegmentReg =
+ OperandX8632Mem::DefaultSegment;
Variable *Base = llvm::dyn_cast<Variable>(Addr);
computeAddressOpt(Base, Index, Shift, Offset);
if (Base && Addr != Base) {
Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
- Shift);
+ Shift, SegmentReg);
Inst->setDeleted();
Context.insert(InstLoad::create(Func, Dest, Addr));
}
@@ -2081,14 +2176,20 @@
Operand *Data = Inst->getData();
Operand *Addr = Inst->getAddr();
Variable *Index = NULL;
- int32_t Shift = 0;
+ uint16_t Shift = 0;
int32_t Offset = 0; // TODO: make Constant
Variable *Base = llvm::dyn_cast<Variable>(Addr);
+ // Vanilla ICE store instructions should not use the segment registers,
+ // and computeAddressOpt only works at the level of Variables and Constants,
+ // not other OperandX8632Mem, so there should be no mention of segment
+ // registers there either.
+ const OperandX8632Mem::SegmentRegisters SegmentReg =
+ OperandX8632Mem::DefaultSegment;
computeAddressOpt(Base, Index, Shift, Offset);
if (Base && Addr != Base) {
Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);
Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
- Shift);
+ Shift, SegmentReg);
Inst->setDeleted();
Context.insert(InstStore::create(Func, Data, Addr));
}
@@ -2147,9 +2248,9 @@
RegIndex = legalizeToVar(Index, true);
}
if (Base != RegBase || Index != RegIndex) {
- From =
- OperandX8632Mem::create(Func, Mem->getType(), RegBase,
- Mem->getOffset(), RegIndex, Mem->getShift());
+ From = OperandX8632Mem::create(
+ Func, Mem->getType(), RegBase, Mem->getOffset(), RegIndex,
+ Mem->getShift(), Mem->getSegmentRegister());
}
if (!(Allowed & Legal_Mem)) {
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 3ca9ca3..7902136 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -83,6 +83,7 @@
virtual void lowerCast(const InstCast *Inst);
virtual void lowerFcmp(const InstFcmp *Inst);
virtual void lowerIcmp(const InstIcmp *Inst);
+ virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst);
virtual void lowerLoad(const InstLoad *Inst);
virtual void lowerPhi(const InstPhi *Inst);
virtual void lowerRet(const InstRet *Inst);
@@ -123,6 +124,7 @@
InstCall *Call = InstCall::create(Func, MaxSrcs, Dest, CallTarget);
return Call;
}
+ static Type stackSlotType();
// The following are helpers that insert lowered x86 instructions
// with minimal syntactic overhead, so that the lowering code can
@@ -246,6 +248,7 @@
void _ucomiss(Operand *Src0, Operand *Src1) {
Context.insert(InstX8632Ucomiss::create(Func, Src0, Src1));
}
+ void _ud2() { Context.insert(InstX8632UD2::create(Func)); }
void _xor(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Xor::create(Func, Dest, Src0));
}
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index bce8c94..2b323f7 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -528,11 +528,37 @@
unsigned NumArgs = Inst->getNumArgOperands();
// Note: Subzero doesn't (yet) do anything special with the Tail
// flag in the bitcode, i.e. CallInst::isTailCall().
- Ice::InstCall *NewInst =
- Ice::InstCall::create(Func, NumArgs, Dest, CallTarget);
+ Ice::InstCall *NewInst = NULL;
+ const Ice::Intrinsics::FullIntrinsicInfo *Info = NULL;
+
+ if (Ice::ConstantRelocatable *Target =
+ llvm::dyn_cast<Ice::ConstantRelocatable>(CallTarget)) {
+ // Check if this direct call is to an Intrinsic (starts with "llvm.")
+ static const char LLVMPrefix[] = "llvm.";
+ const size_t LLVMPrefixLen = strlen(LLVMPrefix);
+ Ice::IceString Name = Target->getName();
+ if (Name.substr(0, LLVMPrefixLen) == LLVMPrefix) {
+ Ice::IceString NameSuffix = Name.substr(LLVMPrefixLen);
+ Info = Ctx->getIntrinsicsInfo().find(NameSuffix);
+ if (!Info) {
+ report_fatal_error(std::string("Invalid PNaCl intrinsic call: ") +
+ LLVMObjectAsString(Inst));
+ }
+ NewInst = Ice::InstIntrinsicCall::create(Func, NumArgs, Dest,
+ CallTarget, Info->Info);
+ }
+ }
+
+ // Not an intrinsic call.
+ if (NewInst == NULL) {
+ NewInst = Ice::InstCall::create(Func, NumArgs, Dest, CallTarget);
+ }
for (unsigned i = 0; i < NumArgs; ++i) {
NewInst->addArg(convertOperand(Inst, i));
}
+ if (Info) {
+ validateIntrinsicCall(NewInst, Info);
+ }
return NewInst;
}
@@ -559,6 +585,31 @@
return Node;
}
+ void validateIntrinsicCall(const Ice::InstCall *Call,
+ const Ice::Intrinsics::FullIntrinsicInfo *I) {
+ assert(I->NumTypes >= 1);
+ if (I->Signature[0] == Ice::IceType_void) {
+ if (Call->getDest() != NULL) {
+ report_fatal_error(
+ "Return value for intrinsic func w/ void return type.");
+ }
+ } else {
+ if (I->Signature[0] != Call->getDest()->getType()) {
+ report_fatal_error("Mismatched return types.");
+ }
+ }
+ if (Call->getNumArgs() + 1 != I->NumTypes) {
+ std::cerr << "Call->getNumArgs() " << (int)Call->getNumArgs()
+ << " I->NumTypes " << (int)I->NumTypes << "\n";
+ report_fatal_error("Mismatched # of args.");
+ }
+ for (size_t i = 1; i < I->NumTypes; ++i) {
+ if (Call->getArg(i - 1)->getType() != I->Signature[i]) {
+ report_fatal_error("Mismatched argument type.");
+ }
+ }
+ }
+
private:
// Data
Ice::GlobalContext *Ctx;
diff --git a/szdiff.py b/szdiff.py
index 9b8d613..046982e 100755
--- a/szdiff.py
+++ b/szdiff.py
@@ -43,14 +43,23 @@
tail_call = re.compile(' tail call ');
trailing_comment = re.compile(';.*')
ignore_pattern = re.compile('^ *$|^declare|^@')
+ prev_line = None
for line in bitcode:
+ if prev_line:
+ line = prev_line + line
+ prev_line = None
# Convert tail call into regular (non-tail) call.
line = tail_call.sub(' call ', line)
# Remove trailing comments and spaces.
line = trailing_comment.sub('', line).rstrip()
# Ignore blanks lines, forward declarations, and variable definitions.
- if not ignore_pattern.search(line):
- llc_out.append(line)
+ if ignore_pattern.search(line):
+ continue
+ # SZ doesn't break up long lines, but LLVM does. Normalize to SZ.
+ if line.endswith(','):
+ prev_line = line
+ continue
+ llc_out.append(line)
# Compare sz_out and llc_out line by line, but ignore pairs of
# lines where the llc line matches a certain pattern.
@@ -61,6 +70,8 @@
'|'.join([' -[0-9]', # negative constants
' (float|double) [-0-9]', # FP constants
' (float|double) %\w+, [-0-9]',
+ ' @llvm\..*i\d+\*', # intrinsic calls w/ pointer args
+ ' i\d+\* @llvm\.', # intrinsic calls w/ pointer ret
' inttoptr ', # inttoptr pointer types
' ptrtoint ', # ptrtoint pointer types
' bitcast .*\* .* to .*\*' # bitcast pointer types
@@ -72,8 +83,8 @@
if llc_line and ignore_pattern.search(llc_line):
lines_diff += 1
continue
- if sz_line: print 'SZ>' + sz_line
- if llc_line: print 'LL>' + llc_line
+ if sz_line: print 'SZ (%d)> %s' % (lines_total, sz_line)
+ if llc_line: print 'LL (%d)> %s' % (lines_total, llc_line)
return_code = 1
if return_code == 0:
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
new file mode 100644
index 0000000..15f9a65
--- /dev/null
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -0,0 +1,177 @@
+; This tests the NaCl intrinsics not related to atomic operations.
+
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s --check-prefix=CHECKO2REM
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
+; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s
+
+; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s
+; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
+; RUN: | FileCheck --check-prefix=DUMP %s
+
+declare i8* @llvm.nacl.read.tp()
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+declare void @llvm.nacl.longjmp(i8*, i32)
+declare i32 @llvm.nacl.setjmp(i8*)
+declare void @llvm.trap()
+
+define i32 @test_nacl_read_tp() {
+entry:
+ %ptr = call i8* @llvm.nacl.read.tp()
+ %__1 = ptrtoint i8* %ptr to i32
+ ret i32 %__1
+}
+; CHECK-LABEL: test_nacl_read_tp
+; CHECK: mov e{{.*}}, dword ptr gs:[0]
+; CHECKO2REM-LABEL: test_nacl_read_tp
+; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0]
+
+define i32 @test_nacl_read_tp_more_addressing() {
+entry:
+ %ptr = call i8* @llvm.nacl.read.tp()
+ %__1 = ptrtoint i8* %ptr to i32
+ %x = add i32 %__1, %__1
+ %__3 = inttoptr i32 %x to i32*
+ %v = load i32* %__3, align 1
+ %ptr2 = call i8* @llvm.nacl.read.tp()
+ %__6 = ptrtoint i8* %ptr2 to i32
+ %y = add i32 %__6, 4
+ %__8 = inttoptr i32 %y to i32*
+ store i32 %v, i32* %__8, align 1
+ ret i32 %v
+}
+; CHECK-LABEL: test_nacl_read_tp_more_addressing
+; CHECK: mov e{{.*}}, dword ptr gs:[0]
+; CHECK: mov e{{.*}}, dword ptr gs:[0]
+; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing
+; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0]
+; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0]
+
+define i32 @test_nacl_read_tp_dead(i32 %a) {
+entry:
+ %ptr = call i8* @llvm.nacl.read.tp()
+ ; Not actually using the result of nacl read tp call.
+ ; In O2 mode this should be DCE'ed.
+ ret i32 %a
+}
+; Consider nacl.read.tp side-effect free, so it can be eliminated.
+; CHECKO2REM-LABEL: test_nacl_read_tp_dead
+; CHECKO2REM-NOT: mov e{{.*}}, dword ptr gs:[0]
+
+define void @test_memcpy(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
+entry:
+ %dst = inttoptr i32 %iptr_dst to i8*
+ %src = inttoptr i32 %iptr_src to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src,
+ i32 %len, i32 1, i1 0)
+ ret void
+}
+; CHECK-LABEL: test_memcpy
+; CHECK: call memcpy
+
+; TODO(jvoung) -- if we want to be clever, we can do this and the memmove,
+; memset without a function call.
+define void @test_memcpy_const_len_align(i32 %iptr_dst, i32 %iptr_src) {
+entry:
+ %dst = inttoptr i32 %iptr_dst to i8*
+ %src = inttoptr i32 %iptr_src to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src,
+ i32 8, i32 1, i1 0)
+ ret void
+}
+; CHECK-LABEL: test_memcpy_const_len_align
+; CHECK: call memcpy
+
+define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
+entry:
+ %dst = inttoptr i32 %iptr_dst to i8*
+ %src = inttoptr i32 %iptr_src to i8*
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src,
+ i32 %len, i32 1, i1 0)
+ ret void
+}
+; CHECK-LABEL: test_memmove
+; CHECK: call memmove
+
+define void @test_memmove_const_len_align(i32 %iptr_dst, i32 %iptr_src) {
+entry:
+ %dst = inttoptr i32 %iptr_dst to i8*
+ %src = inttoptr i32 %iptr_src to i8*
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src,
+ i32 8, i32 1, i1 0)
+ ret void
+}
+; CHECK-LABEL: test_memmove_const_len_align
+; CHECK: call memmove
+
+define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
+entry:
+ %val = trunc i32 %wide_val to i8
+ %dst = inttoptr i32 %iptr_dst to i8*
+ call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val,
+ i32 %len, i32 1, i1 0)
+ ret void
+}
+; CHECK-LABEL: test_memset
+; CHECK: call memset
+
+define void @test_memset_const_len_align(i32 %iptr_dst, i32 %wide_val) {
+entry:
+ %val = trunc i32 %wide_val to i8
+ %dst = inttoptr i32 %iptr_dst to i8*
+ call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val,
+ i32 8, i32 1, i1 0)
+ ret void
+}
+; CHECK-LABEL: test_memset_const_len_align
+; CHECK: call memset
+
+define i32 @test_setjmplongjmp(i32 %iptr_env) {
+entry:
+ %env = inttoptr i32 %iptr_env to i8*
+ %i = call i32 @llvm.nacl.setjmp(i8* %env)
+ %r1 = icmp eq i32 %i, 0
+ br i1 %r1, label %Zero, label %NonZero
+Zero:
+ ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy.
+ %env2 = inttoptr i32 %iptr_env to i8*
+ call void @llvm.nacl.longjmp(i8* %env2, i32 1)
+ ret i32 0
+NonZero:
+ ret i32 1
+}
+; CHECK-LABEL: test_setjmplongjmp
+; CHECK: call setjmp
+; CHECK: call longjmp
+; CHECKO2REM-LABEL: test_setjmplongjmp
+; CHECKO2REM: call setjmp
+; CHECKO2REM: call longjmp
+
+define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) {
+entry:
+ %env = inttoptr i32 %iptr_env to i8*
+ %i = call i32 @llvm.nacl.setjmp(i8* %env)
+ ret i32 %i_other
+}
+; Don't consider setjmp side-effect free, so it's not eliminated if
+; result unused.
+; CHECKO2REM-LABEL: test_setjmp_unused
+; CHECKO2REM: call setjmp
+
+define i32 @test_trap(i32 %br) {
+entry:
+ %r1 = icmp eq i32 %br, 0
+ br i1 %r1, label %Zero, label %NonZero
+Zero:
+ call void @llvm.trap()
+ unreachable
+NonZero:
+ ret i32 1
+}
+; CHECK-LABEL: test_trap
+; CHECK: ud2
+
+; ERRORS-NOT: ICE translation error
+; DUMP-NOT: SZ