SubzeroReactor: implement missing atomic ops

* Most use Subzero intrinsics, except for Min/MaxAtomic, which are
emulated.
* Added unit tests for each implemented function, but am not really
testing that they behave as atomic functions. Only that they perform the
expected operation.

Bug: b/145754674
Change-Id: Ie3ec6e473ee8b448b28bf440da094ac03ac0005b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/39829
Reviewed-by: Ben Clayton <bclayton@google.com>
Tested-by: Antonio Maiorano <amaiorano@google.com>
diff --git a/src/Reactor/EmulatedReactor.cpp b/src/Reactor/EmulatedReactor.cpp
index dc3d558..631960b 100644
--- a/src/Reactor/EmulatedReactor.cpp
+++ b/src/Reactor/EmulatedReactor.cpp
@@ -14,8 +14,10 @@
 
 #include "EmulatedReactor.hpp"
 
+#include <algorithm>
 #include <cmath>
 #include <functional>
+#include <mutex>
 #include <utility>
 
 namespace rr {
@@ -96,6 +98,31 @@
 	}
 }
 
+// TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
+// operations for a given T mutually exclusive, rather than only the ones on the value pointed to
+// by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
+// TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
+template<typename T>
+static T atomicMin(T *ptr, T value)
+{
+	static std::mutex m;
+
+	std::lock_guard<std::mutex> lock(m);
+	T origValue = *ptr;
+	*ptr = std::min(origValue, value);
+	return origValue;
+}
+template<typename T>
+static T atomicMax(T *ptr, T value)
+{
+	static std::mutex m;
+
+	std::lock_guard<std::mutex> lock(m);
+	T origValue = *ptr;
+	*ptr = std::max(origValue, value);
+	return origValue;
+}
+
 }  // anonymous namespace
 
 namespace emulated {
@@ -224,6 +251,26 @@
 	return call4(log2f, x);
 }
 
+RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
+{
+	return Call(atomicMin<int32_t>, x, y);
+}
+
+RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
+{
+	return Call(atomicMin<uint32_t>, x, y);
+}
+
+RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
+{
+	return Call(atomicMax<int32_t>, x, y);
+}
+
+RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
+{
+	return Call(atomicMax<uint32_t>, x, y);
+}
+
 RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs)
 {
 	return call4(fmodf, lhs, rhs);
diff --git a/src/Reactor/EmulatedReactor.hpp b/src/Reactor/EmulatedReactor.hpp
index ccc6245..0443514 100644
--- a/src/Reactor/EmulatedReactor.hpp
+++ b/src/Reactor/EmulatedReactor.hpp
@@ -48,6 +48,10 @@
 RValue<Float4> Log(RValue<Float4> x);
 RValue<Float4> Exp2(RValue<Float4> x);
 RValue<Float4> Log2(RValue<Float4> x);
+RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder);
+RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder);
+RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
 RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs);
 
 }  // namespace emulated
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 7cdf460..47144df 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -4049,6 +4049,26 @@
 	                                                           isZeroUndef ? ::llvm::ConstantInt::getTrue(jit->context) : ::llvm::ConstantInt::getFalse(jit->context)))));
 }
 
+RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
+{
+	return RValue<Int>(Nucleus::createAtomicMin(x.value, y.value, memoryOrder));
+}
+
+RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
+{
+	return RValue<UInt>(Nucleus::createAtomicUMin(x.value, y.value, memoryOrder));
+}
+
+RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
+{
+	return RValue<Int>(Nucleus::createAtomicMax(x.value, y.value, memoryOrder));
+}
+
+RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
+{
+	return RValue<UInt>(Nucleus::createAtomicUMax(x.value, y.value, memoryOrder));
+}
+
 Type *Float4::getType()
 {
 	return T(llvm::VectorType::get(T(Float::getType()), 4));
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 33e1972..b3ce607 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -2623,26 +2623,6 @@
 	return RValue<UInt>(Nucleus::createAtomicXor(x.value, y.value, memoryOrder));
 }
 
-RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
-{
-	return RValue<Int>(Nucleus::createAtomicMin(x.value, y.value, memoryOrder));
-}
-
-RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
-{
-	return RValue<UInt>(Nucleus::createAtomicUMin(x.value, y.value, memoryOrder));
-}
-
-RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
-{
-	return RValue<Int>(Nucleus::createAtomicMax(x.value, y.value, memoryOrder));
-}
-
-RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
-{
-	return RValue<UInt>(Nucleus::createAtomicUMax(x.value, y.value, memoryOrder));
-}
-
 RValue<UInt> ExchangeAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
 {
 	return RValue<UInt>(Nucleus::createAtomicExchange(x.value, y.value, memoryOrder));
diff --git a/src/Reactor/ReactorUnitTests.cpp b/src/Reactor/ReactorUnitTests.cpp
index ffe71c4..e0c40c9 100644
--- a/src/Reactor/ReactorUnitTests.cpp
+++ b/src/Reactor/ReactorUnitTests.cpp
@@ -2252,6 +2252,215 @@
 	EXPECT_EQ(result[3], 678);
 }
 
+TEST(ReactorUnitTests, AddAtomic)
+{
+	FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+	{
+		Pointer<UInt> p = function.Arg<0>();
+		UInt a = function.Arg<1>();
+		UInt r = rr::AddAtomic(p, a, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 123;
+	uint32_t y = 456;
+	uint32_t prevX = routine(&x, y);
+	EXPECT_EQ(prevX, 123u);
+	EXPECT_EQ(x, 579u);
+}
+
+TEST(ReactorUnitTests, SubAtomic)
+{
+	FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+	{
+		Pointer<UInt> p = function.Arg<0>();
+		UInt a = function.Arg<1>();
+		UInt r = rr::SubAtomic(p, a, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 456;
+	uint32_t y = 123;
+	uint32_t prevX = routine(&x, y);
+	EXPECT_EQ(prevX, 456u);
+	EXPECT_EQ(x, 333u);
+}
+
+TEST(ReactorUnitTests, AndAtomic)
+{
+	FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+	{
+		Pointer<UInt> p = function.Arg<0>();
+		UInt a = function.Arg<1>();
+		UInt r = rr::AndAtomic(p, a, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 0b1111'0000;
+	uint32_t y = 0b1010'1100;
+	uint32_t prevX = routine(&x, y);
+	EXPECT_EQ(prevX, 0b1111'0000u);
+	EXPECT_EQ(x, 0b1010'0000u);
+}
+
+TEST(ReactorUnitTests, OrAtomic)
+{
+	FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+	{
+		Pointer<UInt> p = function.Arg<0>();
+		UInt a = function.Arg<1>();
+		UInt r = rr::OrAtomic(p, a, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 0b1111'0000;
+	uint32_t y = 0b1010'1100;
+	uint32_t prevX = routine(&x, y);
+	EXPECT_EQ(prevX, 0b1111'0000u);
+	EXPECT_EQ(x, 0b1111'1100u);
+}
+
+TEST(ReactorUnitTests, XorAtomic)
+{
+	FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+	{
+		Pointer<UInt> p = function.Arg<0>();
+		UInt a = function.Arg<1>();
+		UInt r = rr::XorAtomic(p, a, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 0b1111'0000;
+	uint32_t y = 0b1010'1100;
+	uint32_t prevX = routine(&x, y);
+	EXPECT_EQ(prevX, 0b1111'0000u);
+	EXPECT_EQ(x, 0b0101'1100u);
+}
+
+TEST(ReactorUnitTests, MinAtomic)
+{
+	{
+		FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+		{
+			Pointer<UInt> p = function.Arg<0>();
+			UInt a = function.Arg<1>();
+			UInt r = rr::MinAtomic(p, a, std::memory_order_relaxed);
+			Return(r);
+		}
+
+		auto routine = function("one");
+		uint32_t x = 123;
+		uint32_t y = 100;
+		uint32_t prevX = routine(&x, y);
+		EXPECT_EQ(prevX, 123u);
+		EXPECT_EQ(x, 100u);
+	}
+
+	{
+		FunctionT<int32_t(int32_t * p, int32_t a)> function;
+		{
+			Pointer<Int> p = function.Arg<0>();
+			Int a = function.Arg<1>();
+			Int r = rr::MinAtomic(p, a, std::memory_order_relaxed);
+			Return(r);
+		}
+
+		auto routine = function("one");
+		int32_t x = -123;
+		int32_t y = -200;
+		int32_t prevX = routine(&x, y);
+		EXPECT_EQ(prevX, -123);
+		EXPECT_EQ(x, -200);
+	}
+}
+
+TEST(ReactorUnitTests, MaxAtomic)
+{
+	{
+		FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+		{
+			Pointer<UInt> p = function.Arg<0>();
+			UInt a = function.Arg<1>();
+			UInt r = rr::MaxAtomic(p, a, std::memory_order_relaxed);
+			Return(r);
+		}
+
+		auto routine = function("one");
+		uint32_t x = 123;
+		uint32_t y = 100;
+		uint32_t prevX = routine(&x, y);
+		EXPECT_EQ(prevX, 123u);
+		EXPECT_EQ(x, 123u);
+	}
+
+	{
+		FunctionT<int32_t(int32_t * p, int32_t a)> function;
+		{
+			Pointer<Int> p = function.Arg<0>();
+			Int a = function.Arg<1>();
+			Int r = rr::MaxAtomic(p, a, std::memory_order_relaxed);
+			Return(r);
+		}
+
+		auto routine = function("one");
+		int32_t x = -123;
+		int32_t y = -200;
+		int32_t prevX = routine(&x, y);
+		EXPECT_EQ(prevX, -123);
+		EXPECT_EQ(x, -123);
+	}
+}
+
+TEST(ReactorUnitTests, ExchangeAtomic)
+{
+	FunctionT<uint32_t(uint32_t * p, uint32_t a)> function;
+	{
+		Pointer<UInt> p = function.Arg<0>();
+		UInt a = function.Arg<1>();
+		UInt r = rr::ExchangeAtomic(p, a, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 123;
+	uint32_t y = 456;
+	uint32_t prevX = routine(&x, y);
+	EXPECT_EQ(prevX, 123u);
+	EXPECT_EQ(x, y);
+}
+
+TEST(ReactorUnitTests, CompareExchangeAtomic)
+{
+	FunctionT<uint32_t(uint32_t * x, uint32_t y, uint32_t compare)> function;
+	{
+		Pointer<UInt> x = function.Arg<0>();
+		UInt y = function.Arg<1>();
+		UInt compare = function.Arg<2>();
+		UInt r = rr::CompareExchangeAtomic(x, y, compare, std::memory_order_relaxed, std::memory_order_relaxed);
+		Return(r);
+	}
+
+	auto routine = function("one");
+	uint32_t x = 123;
+	uint32_t y = 456;
+	uint32_t compare = 123;
+	uint32_t prevX = routine(&x, y, compare);
+	EXPECT_EQ(prevX, 123u);
+	EXPECT_EQ(x, y);
+
+	x = 123;
+	y = 456;
+	compare = 456;
+	prevX = routine(&x, y, compare);
+	EXPECT_EQ(prevX, 123u);
+	EXPECT_EQ(x, 123u);
+}
+
 TEST(ReactorUnitTests, SRem)
 {
 	FunctionT<void(int4 *, int4 *)> function;
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 91de455..3c2888f 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -89,7 +89,7 @@
 #	define __x86_64__ 1
 #endif
 
-static Ice::OptLevel toIce(rr::Optimization::Level level)
+Ice::OptLevel toIce(rr::Optimization::Level level)
 {
 	switch(level)
 	{
@@ -103,6 +103,20 @@
 	return Ice::Opt_2;
 }
 
+Ice::Intrinsics::MemoryOrder stdToIceMemoryOrder(std::memory_order memoryOrder)
+{
+	switch(memoryOrder)
+	{
+		case std::memory_order_relaxed: return Ice::Intrinsics::MemoryOrderRelaxed;
+		case std::memory_order_consume: return Ice::Intrinsics::MemoryOrderConsume;
+		case std::memory_order_acquire: return Ice::Intrinsics::MemoryOrderAcquire;
+		case std::memory_order_release: return Ice::Intrinsics::MemoryOrderRelease;
+		case std::memory_order_acq_rel: return Ice::Intrinsics::MemoryOrderAcquireRelease;
+		case std::memory_order_seq_cst: return Ice::Intrinsics::MemoryOrderSequentiallyConsistent;
+	}
+	return Ice::Intrinsics::MemoryOrderInvalid;
+}
+
 class CPUID
 {
 public:
@@ -1132,70 +1146,71 @@
 	return createAdd(ptr, index);
 }
 
+static Value *createAtomicRMW(Ice::Intrinsics::AtomicRMWOperation rmwOp, Value *ptr, Value *value, std::memory_order memoryOrder)
+{
+	Ice::Variable *result = ::function->makeVariable(value->getType());
+
+	const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicRMW, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
+	auto target = ::context->getConstantUndef(Ice::IceType_i32);
+	auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
+	auto op = ::context->getConstantInt32(rmwOp);
+	auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
+	inst->addArg(op);
+	inst->addArg(ptr);
+	inst->addArg(value);
+	inst->addArg(order);
+	::basicBlock->appendInst(inst);
+
+	return V(result);
+}
+
 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("createAtomicAdd");
-	return nullptr;
+	return createAtomicRMW(Ice::Intrinsics::AtomicAdd, ptr, value, memoryOrder);
 }
 
 Value *Nucleus::createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("createAtomicSub");
-	return nullptr;
+	return createAtomicRMW(Ice::Intrinsics::AtomicSub, ptr, value, memoryOrder);
 }
 
 Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("createAtomicAnd");
-	return nullptr;
+	return createAtomicRMW(Ice::Intrinsics::AtomicAnd, ptr, value, memoryOrder);
 }
 
 Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("createAtomicOr");
-	return nullptr;
+	return createAtomicRMW(Ice::Intrinsics::AtomicOr, ptr, value, memoryOrder);
 }
 
 Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("createAtomicXor");
-	return nullptr;
-}
-
-Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
-{
-	UNIMPLEMENTED("createAtomicMin");
-	return nullptr;
-}
-
-Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
-{
-	UNIMPLEMENTED("createAtomicMax");
-	return nullptr;
-}
-
-Value *Nucleus::createAtomicUMin(Value *ptr, Value *value, std::memory_order memoryOrder)
-{
-	UNIMPLEMENTED("createAtomicUMin");
-	return nullptr;
-}
-
-Value *Nucleus::createAtomicUMax(Value *ptr, Value *value, std::memory_order memoryOrder)
-{
-	UNIMPLEMENTED("createAtomicUMax");
-	return nullptr;
+	return createAtomicRMW(Ice::Intrinsics::AtomicXor, ptr, value, memoryOrder);
 }
 
 Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("createAtomicExchange");
-	return nullptr;
+	return createAtomicRMW(Ice::Intrinsics::AtomicExchange, ptr, value, memoryOrder);
 }
 
 Value *Nucleus::createAtomicCompareExchange(Value *ptr, Value *value, Value *compare, std::memory_order memoryOrderEqual, std::memory_order memoryOrderUnequal)
 {
-	UNIMPLEMENTED("createAtomicCompareExchange");
-	return nullptr;
+	Ice::Variable *result = ::function->makeVariable(value->getType());
+
+	const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicCmpxchg, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_T };
+	auto target = ::context->getConstantUndef(Ice::IceType_i32);
+	auto inst = Ice::InstIntrinsicCall::create(::function, 0, result, target, intrinsic);
+	auto orderEq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderEqual));
+	auto orderNeq = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrderUnequal));
+	inst->addArg(ptr);
+	inst->addArg(compare);
+	inst->addArg(value);
+	inst->addArg(orderEq);
+	inst->addArg(orderNeq);
+	::basicBlock->appendInst(inst);
+
+	return V(result);
 }
 
 static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
@@ -3603,8 +3618,14 @@
 
 void Nucleus::createFence(std::memory_order memoryOrder)
 {
-	UNIMPLEMENTED("Subzero createFence()");
+	const Ice::Intrinsics::IntrinsicInfo intrinsic = { Ice::Intrinsics::AtomicFence, Ice::Intrinsics::SideEffects_T, Ice::Intrinsics::ReturnsTwice_F, Ice::Intrinsics::MemoryWrite_F };
+	auto target = ::context->getConstantUndef(Ice::IceType_i32);
+	auto inst = Ice::InstIntrinsicCall::create(::function, 0, nullptr, target, intrinsic);
+	auto order = ::context->getConstantInt32(stdToIceMemoryOrder(memoryOrder));
+	inst->addArg(order);
+	::basicBlock->appendInst(inst);
 }
+
 Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
 {
 	UNIMPLEMENTED("Subzero createMaskedLoad()");
@@ -3813,6 +3834,26 @@
 	}
 }
 
+RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
+{
+	return emulated::MinAtomic(x, y, memoryOrder);
+}
+
+RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
+{
+	return emulated::MinAtomic(x, y, memoryOrder);
+}
+
+RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
+{
+	return emulated::MaxAtomic(x, y, memoryOrder);
+}
+
+RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
+{
+	return emulated::MaxAtomic(x, y, memoryOrder);
+}
+
 void EmitDebugLocation() {}
 void EmitDebugVariable(Value *value) {}
 void FlushDebug() {}
diff --git a/third_party/subzero/src/IceIntrinsics.cpp b/third_party/subzero/src/IceIntrinsics.cpp
index cf3c976..7f8af30 100644
--- a/third_party/subzero/src/IceIntrinsics.cpp
+++ b/third_party/subzero/src/IceIntrinsics.cpp
@@ -259,6 +259,9 @@
 
 // Returns whether PNaCl allows the given memory ordering in general.
 bool isMemoryOrderValidPNaCl(uint64_t Order) {
+  if (::Ice::getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl)
+    return true;
+
   switch (Order) {
   case Intrinsics::MemoryOrderAcquire:
   case Intrinsics::MemoryOrderRelease: