Subzero: lower the rest of the atomic operations.
64-bit ops are expanded via a cmpxchg8b loop.
64/32-bit and/or/xor are also expanded into a cmpxchg /
cmpxchg8b loop.
Add a cross test for atomic RMW operations and
compare and swap.
Misc: Test that atomic.is.lock.free can be optimized out if result is ignored.
TODO:
* optimize compare and swap with compare+branch further down
instruction stream.
* optimize atomic RMW when the return value is ignored
(adds a locked field to binary ops though).
* We may want to do some actual target-dependent basic
block splitting + expansion (the instructions inserted by
the expansion must reference the pre-colored registers,
etc.). Otherwise, we are currently getting by with modeling
the extended liveness of the variables used in the loops
using fake uses.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882
R=jfb@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/362463002
diff --git a/crosstest/crosstest.py b/crosstest/crosstest.py
index a37b10f..be6c54c 100755
--- a/crosstest/crosstest.py
+++ b/crosstest/crosstest.py
@@ -57,6 +57,11 @@
metavar='PATH',
help='Path to LLVM executables like llc ' +
'(defaults to $LLVM_BIN_PATH)')
+ argparser.add_argument('--crosstest-bitcode', required=False,
+ default=1, type=int,
+ help='Compile non-subzero crosstest object file ' +
+ 'from the same bitcode as the subzero object. ' +
+ 'If 0, then compile it straight from source.')
args = argparser.parse_args()
objs = []
@@ -113,7 +118,9 @@
# failures. This behavior can be inspected by switching
# use_llc between True and False.
use_llc = False
- if use_llc:
+ if not args.crosstest_bitcode:
+ objs.append(arg)
+ elif use_llc:
shellcmd([os.path.join(llvm_bin_path, 'llc'),
'-filetype=obj',
'-o=' + obj_llc,
@@ -125,4 +132,4 @@
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
objs +
- ['-lm', '-o', os.path.join(args.dir, args.output)])
+ ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh
index cf821e2..bba53d7 100755
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -64,6 +64,17 @@
--driver=test_icmp_main.cpp \
--output=test_icmp_O${optlevel}
+ # Compile the non-subzero object files straight from source
+ # since the native LLVM backend does not understand how to
+ # lower NaCl-specific intrinsics.
+ ./crosstest.py -O${optlevel} --prefix=Subzero_ --target=x8632 \
+ --dir="${OUTDIR}" \
+ --llvm-bin-path="${LLVM_BIN_PATH}" \
+ --test=test_sync_atomic.cpp \
+ --crosstest-bitcode=0 \
+ --driver=test_sync_atomic_main.cpp \
+ --output=test_sync_atomic_O${optlevel}
+
done
for optlevel in ${OPTLEVELS} ; do
@@ -74,4 +85,5 @@
"${OUTDIR}"/test_fcmp_O${optlevel}
"${OUTDIR}"/test_global_O${optlevel}
"${OUTDIR}"/test_icmp_O${optlevel}
+ "${OUTDIR}"/test_sync_atomic_O${optlevel}
done
diff --git a/crosstest/test_sync_atomic.cpp b/crosstest/test_sync_atomic.cpp
new file mode 100644
index 0000000..05d0336
--- /dev/null
+++ b/crosstest/test_sync_atomic.cpp
@@ -0,0 +1,63 @@
+//===- subzero/crosstest/test_sync_atomic.cpp - Implementation for tests --===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This aims to test that all the atomic RMW instructions and compare and swap
+// work across the allowed atomic types. This uses the __sync_* builtins
+// to test the atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#include <cstdlib>
+
+#include "test_sync_atomic.h"
+
+#define X(inst, type) \
+ type test_##inst(bool fetch_first, volatile type *ptr, type a) { \
+ if (fetch_first) { \
+ return __sync_fetch_and_##inst(ptr, a); \
+ } else { \
+ return __sync_##inst##_and_fetch(ptr, a); \
+ } \
+ } \
+ type test_alloca_##inst(bool fetch, volatile type *ptr, type a) { \
+ const size_t buf_size = 8; \
+ type buf[buf_size]; \
+ for (size_t i = 0; i < buf_size; ++i) { \
+ if (fetch) { \
+ buf[i] = __sync_fetch_and_##inst(ptr, a); \
+ } else { \
+ buf[i] = __sync_##inst##_and_fetch(ptr, a); \
+ } \
+ } \
+ type sum = 0; \
+ for (size_t i = 0; i < buf_size; ++i) { \
+ sum += buf[i]; \
+ } \
+ return sum; \
+ } \
+ type test_const_##inst(bool fetch, volatile type *ptr, type ign) { \
+ if (fetch) { \
+ return __sync_fetch_and_##inst(ptr, 42); \
+ } else { \
+ return __sync_##inst##_and_fetch(ptr, 99); \
+ } \
+ }
+
+FOR_ALL_RMWOP_TYPES(X)
+#undef X
+
+#define X(type) \
+ type test_val_cmp_swap(volatile type *ptr, type oldval, type newval) { \
+ return __sync_val_compare_and_swap(ptr, oldval, newval); \
+ }
+
+ATOMIC_TYPE_TABLE
+#undef X
diff --git a/crosstest/test_sync_atomic.def b/crosstest/test_sync_atomic.def
new file mode 100644
index 0000000..f84afde
--- /dev/null
+++ b/crosstest/test_sync_atomic.def
@@ -0,0 +1,50 @@
+//===- subzero/crosstest/test_sync_atomic.def - macros for tests -*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines macros for testing atomic intrinsics (via sync builtins).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_SYNC_ATOMIC_DEF
+#define TEST_SYNC_ATOMIC_DEF
+
+#define STR(s) #s
+
+#define RMWOP_TABLE \
+ /* inst */ \
+ X(add) \
+ X(sub) \
+ X(or) \
+ X(and) \
+ X(xor)
+//#define X(inst)
+
+#define ATOMIC_TYPE_TABLE \
+ /* type */ \
+ X(uint8_t) \
+ X(uint16_t) \
+ X(uint32_t) \
+ X(uint64_t)
+//#define X(type)
+
+#define FOR_ALL_RMWTYPES_INST(F, inst) \
+ F(inst, uint8_t) \
+ F(inst, uint16_t) \
+ F(inst, uint32_t) \
+ F(inst, uint64_t)
+
+#define FOR_ALL_RMWOP_TYPES(X) \
+ FOR_ALL_RMWTYPES_INST(X, add) \
+ FOR_ALL_RMWTYPES_INST(X, sub) \
+ FOR_ALL_RMWTYPES_INST(X, or) \
+ FOR_ALL_RMWTYPES_INST(X, and) \
+ FOR_ALL_RMWTYPES_INST(X, xor)
+//#define X(inst, type)
+
+#endif // TEST_SYNC_ATOMIC_DEF
diff --git a/crosstest/test_sync_atomic.h b/crosstest/test_sync_atomic.h
new file mode 100644
index 0000000..a88cd73
--- /dev/null
+++ b/crosstest/test_sync_atomic.h
@@ -0,0 +1,29 @@
+//===- subzero/crosstest/test_sync_atomic.h - Test prototypes ---*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function prototypes for cross testing atomic
+// intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "test_sync_atomic.def"
+
+#define X(inst, type) \
+ type test_##inst(bool fetch_first, volatile type *ptr, type a); \
+ type test_alloca_##inst(bool fetch, volatile type *ptr, type a); \
+ type test_const_##inst(bool fetch, volatile type *ptr, type ignored);
+
+FOR_ALL_RMWOP_TYPES(X)
+#undef X
+
+#define X(type) \
+ type test_val_cmp_swap(volatile type *ptr, type oldval, type newval);
+
+ATOMIC_TYPE_TABLE
+#undef X
diff --git a/crosstest/test_sync_atomic_main.cpp b/crosstest/test_sync_atomic_main.cpp
new file mode 100644
index 0000000..0cae7cd
--- /dev/null
+++ b/crosstest/test_sync_atomic_main.cpp
@@ -0,0 +1,298 @@
+//===- subzero/crosstest/test_sync_atomic_main.cpp - Driver for tests -----===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Driver for cross testing atomic intrinsics, via the sync builtins.
+//
+//===----------------------------------------------------------------------===//
+
+/* crosstest.py --test=test_sync_atomic.cpp --crosstest-bitcode=0 \
+ --driver=test_sync_atomic_main.cpp --prefix=Subzero_ \
+ --output=test_sync_atomic */
+
+#include <pthread.h>
+#include <stdint.h>
+
+#include <cerrno>
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+// Include test_sync_atomic.h twice - once normally, and once within the
+// Subzero_ namespace, corresponding to the llc and Subzero translated
+// object files, respectively.
+#include "test_sync_atomic.h"
+namespace Subzero_ {
+#include "test_sync_atomic.h"
+}
+
+volatile uint64_t Values[] = {
+ 0, 1, 0x7e,
+ 0x7f, 0x80, 0x81,
+ 0xfe, 0xff, 0x7ffe,
+ 0x7fff, 0x8000, 0x8001,
+ 0xfffe, 0xffff,
+ 0x007fffff /*Max subnormal + */,
+ 0x00800000 /*Min+ */, 0x7f7fffff /*Max+ */,
+ 0x7f800000 /*+Inf*/, 0xff800000 /*-Inf*/,
+ 0x7fa00000 /*SNaN*/, 0x7fc00000 /*QNaN*/,
+ 0x7ffffffe, 0x7fffffff, 0x80000000,
+ 0x80000001, 0xfffffffe, 0xffffffff,
+ 0x100000000ll, 0x100000001ll,
+ 0x000fffffffffffffll /*Max subnormal + */,
+ 0x0010000000000000ll /*Min+ */,
+ 0x7fefffffffffffffll /*Max+ */,
+ 0x7ff0000000000000ll /*+Inf*/,
+ 0xfff0000000000000ll /*-Inf*/,
+ 0x7ff0000000000001ll /*SNaN*/,
+ 0x7ff8000000000000ll /*QNaN*/,
+ 0x7ffffffffffffffell, 0x7fffffffffffffffll, 0x8000000000000000ll,
+ 0x8000000000000001ll, 0xfffffffffffffffell, 0xffffffffffffffffll };
+
+const static size_t NumValues = sizeof(Values) / sizeof(*Values);
+
+struct {
+ volatile uint8_t l8;
+ volatile uint16_t l16;
+ volatile uint32_t l32;
+ volatile uint64_t l64;
+} AtomicLocs;
+
+template <typename Type>
+void testAtomicRMW(volatile Type *AtomicLoc,
+ size_t &TotalTests, size_t &Passes, size_t &Failures) {
+ typedef Type (*FuncType)(bool, volatile Type*, Type);
+ static struct {
+ const char *Name;
+ FuncType FuncLlc;
+ FuncType FuncSz;
+ } Funcs[] = {
+#define X(inst) \
+ { \
+ STR(inst), test_##inst, Subzero_::test_##inst \
+ }, \
+ { \
+ STR(inst) "_alloca", test_alloca_##inst, Subzero_::test_alloca_##inst \
+ }, \
+ { \
+ STR(inst) "_const", test_const_##inst, Subzero_::test_const_##inst \
+ },
+ RMWOP_TABLE
+#undef X
+ };
+ const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+
+ for (size_t f = 0; f < NumFuncs; ++f) {
+ for (size_t i = 0; i < NumValues; ++i) {
+ Type Value1 = static_cast<Type>(Values[i]);
+ for (size_t j = 0; j < NumValues; ++j) {
+ Type Value2 = static_cast<Type>(Values[j]);
+ for (size_t k = 0; k < 2; ++k) {
+ bool fetch_first = k;
+ ++TotalTests;
+ *AtomicLoc = Value1;
+ Type ResultSz1 = Funcs[f].FuncSz(
+ fetch_first, AtomicLoc, Value2);
+ Type ResultSz2 = *AtomicLoc;
+ *AtomicLoc = Value1;
+ Type ResultLlc1 = Funcs[f].FuncLlc(
+ fetch_first, AtomicLoc, Value2);
+ Type ResultLlc2 = *AtomicLoc;
+ if (ResultSz1 == ResultLlc1 && ResultSz2 == ResultLlc2) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << "test_" << Funcs[f].Name
+ << (CHAR_BIT * sizeof(Type)) << "("
+ << static_cast<uint64_t>(Value1) << ", "
+ << static_cast<uint64_t>(Value2)
+ << "): sz1=" << static_cast<uint64_t>(ResultSz1)
+ << " llc1=" << static_cast<uint64_t>(ResultLlc1)
+ << " sz2=" << static_cast<uint64_t>(ResultSz2)
+ << " llc2=" << static_cast<uint64_t>(ResultLlc2)
+ << "\n";
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename Type>
+void testValCompareAndSwap(volatile Type *AtomicLoc, size_t &TotalTests,
+ size_t &Passes, size_t &Failures) {
+ for (size_t i = 0; i < NumValues; ++i) {
+ Type Value1 = static_cast<Type>(Values[i]);
+ for (size_t j = 0; j < NumValues; ++j) {
+ Type Value2 = static_cast<Type>(Values[j]);
+ for (size_t f = 0; f < 2; ++f) {
+ bool flip = f;
+ ++TotalTests;
+ *AtomicLoc = Value1;
+ Type ResultSz1 = Subzero_::test_val_cmp_swap(
+ AtomicLoc, flip ? Value2 : Value1, Value2);
+ Type ResultSz2 = *AtomicLoc;
+ *AtomicLoc = Value1;
+ Type ResultLlc1 = test_val_cmp_swap(
+ AtomicLoc, flip ? Value2 : Value1, Value2);
+ Type ResultLlc2 = *AtomicLoc;
+ if (ResultSz1 == ResultLlc1 && ResultSz2 == ResultLlc2) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << "test_val_cmp_swap" << (CHAR_BIT * sizeof(Type)) << "("
+ << static_cast<uint64_t>(Value1) << ", "
+ << static_cast<uint64_t>(Value2)
+ << "): sz1=" << static_cast<uint64_t>(ResultSz1)
+ << " llc1=" << static_cast<uint64_t>(ResultLlc1)
+ << " sz2=" << static_cast<uint64_t>(ResultSz2)
+ << " llc2=" << static_cast<uint64_t>(ResultLlc2)
+ << "\n";
+ }
+ }
+ }
+ }
+}
+
+template <typename Type>
+struct ThreadData {
+ Type (*FuncPtr)(bool, volatile Type*, Type);
+ bool Fetch;
+ volatile Type *Ptr;
+ Type Adjustment;
+};
+
+template <typename Type>
+void *threadWrapper(void *Data) {
+ const size_t NumReps = 8000;
+ ThreadData<Type> *TData = reinterpret_cast<ThreadData<Type>*>(Data);
+ for (size_t i = 0; i < NumReps; ++i) {
+ (void)TData->FuncPtr(TData->Fetch, TData->Ptr, TData->Adjustment);
+ }
+ return NULL;
+}
+
+template <typename Type>
+void testAtomicRMWThreads(volatile Type *AtomicLoc, size_t &TotalTests,
+ size_t &Passes, size_t &Failures) {
+ typedef Type (*FuncType)(bool, volatile Type*, Type);
+ static struct {
+ const char *Name;
+ FuncType FuncLlc;
+ FuncType FuncSz;
+ } Funcs[] = {
+#define X(inst) \
+ { \
+ STR(inst), test_##inst, Subzero_::test_##inst \
+ }, \
+ { \
+ STR(inst) "_alloca", test_alloca_##inst, Subzero_::test_alloca_##inst \
+ },
+ RMWOP_TABLE
+#undef X
+ };
+ const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+
+ // Just test a few values, otherwise it takes a *really* long time.
+ volatile uint64_t ValuesSubset[] = { 1, 0x7e, 0x000fffffffffffffffll };
+ const size_t NumValuesSubset = sizeof(ValuesSubset) / sizeof(*ValuesSubset);
+
+ for (size_t f = 0; f < NumFuncs; ++f) {
+ for (size_t i = 0; i < NumValuesSubset; ++i) {
+ Type Value1 = static_cast<Type>(ValuesSubset[i]);
+ for (size_t j = 0; j < NumValuesSubset; ++j) {
+ Type Value2 = static_cast<Type>(ValuesSubset[j]);
+ bool fetch_first = true;
+ ThreadData<Type> TDataSz = {
+ Funcs[f].FuncSz, fetch_first, AtomicLoc, Value2 };
+ ThreadData<Type> TDataLlc = {
+ Funcs[f].FuncLlc, fetch_first, AtomicLoc, Value2 };
+ ++TotalTests;
+ const size_t NumThreads = 4;
+ pthread_t t[NumThreads];
+
+ // Try N threads w/ just Llc.
+ *AtomicLoc = Value1;
+ for (size_t m = 0; m < NumThreads; ++m) {
+ pthread_create(&t[m], NULL, &threadWrapper<Type>,
+ reinterpret_cast<void *>(&TDataLlc));
+ }
+ for (size_t m = 0; m < NumThreads; ++m) {
+ pthread_join(t[m], NULL);
+ }
+ Type ResultLlc = *AtomicLoc;
+
+ // Try N threads w/ both Sz and Llc.
+ *AtomicLoc = Value1;
+ for (size_t m = 0; m < NumThreads; ++m) {
+ if (pthread_create(&t[m], NULL, &threadWrapper<Type>,
+ m % 2 == 0
+ ? reinterpret_cast<void *>(&TDataLlc)
+ : reinterpret_cast<void *>(&TDataSz)) != 0) {
+ ++Failures;
+ std::cout << "pthread_create failed w/ " << strerror(errno) << "\n";
+ abort();
+ }
+ }
+ for (size_t m = 0; m < NumThreads; ++m) {
+ if (pthread_join(t[m], NULL) != 0) {
+ ++Failures;
+ std::cout << "pthread_join failed w/ " << strerror(errno) << "\n";
+ abort();
+ }
+ }
+ Type ResultMixed = *AtomicLoc;
+
+ if (ResultLlc == ResultMixed) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << "test_with_threads_" << Funcs[f].Name
+ << (8 * sizeof(Type)) << "("
+ << static_cast<uint64_t>(Value1) << ", "
+ << static_cast<uint64_t>(Value2)
+ << "): llc=" << static_cast<uint64_t>(ResultLlc)
+ << " mixed=" << static_cast<uint64_t>(ResultMixed)
+ << "\n";
+ }
+ }
+ }
+ }
+}
+
+int main(int argc, char **argv) {
+ size_t TotalTests = 0;
+ size_t Passes = 0;
+ size_t Failures = 0;
+
+ testAtomicRMW<uint8_t>(&AtomicLocs.l8, TotalTests, Passes, Failures);
+ testAtomicRMW<uint16_t>(&AtomicLocs.l16, TotalTests, Passes, Failures);
+ testAtomicRMW<uint32_t>(&AtomicLocs.l32, TotalTests, Passes, Failures);
+ testAtomicRMW<uint64_t>(&AtomicLocs.l64, TotalTests, Passes, Failures);
+ testValCompareAndSwap<uint8_t>(
+ &AtomicLocs.l8, TotalTests, Passes, Failures);
+ testValCompareAndSwap<uint16_t>(
+ &AtomicLocs.l16, TotalTests, Passes, Failures);
+ testValCompareAndSwap<uint32_t>(
+ &AtomicLocs.l32, TotalTests, Passes, Failures);
+ testValCompareAndSwap<uint64_t>(
+ &AtomicLocs.l64, TotalTests, Passes, Failures);
+ testAtomicRMWThreads<uint8_t>(
+ &AtomicLocs.l8, TotalTests, Passes, Failures);
+ testAtomicRMWThreads<uint16_t>(
+ &AtomicLocs.l16, TotalTests, Passes, Failures);
+ testAtomicRMWThreads<uint32_t>(
+ &AtomicLocs.l32, TotalTests, Passes, Failures);
+ testAtomicRMWThreads<uint64_t>(
+ &AtomicLocs.l64, TotalTests, Passes, Failures);
+
+ std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
+ << " Failures=" << Failures << "\n";
+ return Failures;
+}