Implement atomic load/store memory ordering

Bug b/127472316

Test: dEQP-VK.spirv_assembly.instruction.compute.opatomic.load
Test: dEQP-VK.spirv_assembly.instruction.compute.opatomic.store
Change-Id: I0379a1a74bc9742a3a9ae4d52b319e0838fd21bf
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/27728
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 29e55b4..afd397d 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1387,6 +1387,7 @@
 
 	void SpirvShader::EmitLoad(InsnIterator insn, SpirvRoutine *routine) const
 	{
+		bool atomic = (insn.opcode() == spv::OpAtomicLoad);
 		Object::ID resultId = insn.word(2);
 		Object::ID pointerId = insn.word(3);
 		auto &result = getObject(resultId);
@@ -1394,10 +1395,18 @@
 		auto &pointer = getObject(pointerId);
 		auto &pointerBase = getObject(pointer.pointerBase);
 		auto &pointerBaseTy = getType(pointerBase.type);
+		std::memory_order memoryOrder = std::memory_order_relaxed;
+
+		if(atomic)
+		{
+			Object::ID semanticsId = insn.word(5);
+			auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
+			memoryOrder = MemoryOrder(memorySemantics);
+		}
 
 		ASSERT(getType(pointer.type).element == result.type);
 		ASSERT(Type::ID(insn.word(1)) == result.type);
-		ASSERT((insn.opcode() != spv::OpAtomicLoad) || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
+		ASSERT(!atomic || getType(getType(pointer.type).element).opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
 
 		if (pointerBaseTy.storageClass == spv::StorageClassImage)
 		{
@@ -1434,7 +1443,7 @@
 					{
 						Int offset = Int(i) + Extract(offsets, j);
 						if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-						load[i] = Insert(load[i], ptrBase[offset], j);
+						load[i] = Insert(load[i], Load(&ptrBase[offset], sizeof(float), atomic, memoryOrder), j);
 					}
 				}
 			}
@@ -1448,7 +1457,7 @@
 				Pointer<SIMD::Float> src = ptrBase;
 				for (auto i = 0u; i < resultTy.sizeInComponents; i++)
 				{
-					load[i] = src[i];
+					load[i] = Load(&src[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 				}
 			}
 			else
@@ -1456,7 +1465,7 @@
 				// Non-interleaved data.
 				for (auto i = 0u; i < resultTy.sizeInComponents; i++)
 				{
-					load[i] = RValue<SIMD::Float>(ptrBase[i]);
+					load[i] = RValue<SIMD::Float>(Load(&ptrBase[i], sizeof(float), atomic, memoryOrder));  // TODO: optimize alignment
 				}
 			}
 		}
@@ -1479,6 +1488,14 @@
 		auto &elementTy = getType(pointerTy.element);
 		auto &pointerBase = getObject(pointer.pointerBase);
 		auto &pointerBaseTy = getType(pointerBase.type);
+		std::memory_order memoryOrder = std::memory_order_relaxed;
+
+		if(atomic)
+		{
+			Object::ID semanticsId = insn.word(3);
+			auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
+			memoryOrder = MemoryOrder(memorySemantics);
+		}
 
 		ASSERT(!atomic || elementTy.opcode() == spv::OpTypeInt);  // Vulkan 1.1: "Atomic instructions must declare a scalar 32-bit integer type, for the value pointed to by Pointer."
 
@@ -1518,7 +1535,7 @@
 						{
 							Int offset = Int(i) + Extract(offsets, j);
 							if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-							ptrBase[offset] = RValue<Float>(src[i]);
+							Store(RValue<Float>(src[i]), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
 						}
 					}
 				}
@@ -1530,7 +1547,7 @@
 				Pointer<SIMD::Float> dst = ptrBase;
 				for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 				{
-					dst[i] = RValue<SIMD::Float>(src[i]);
+					Store(RValue<SIMD::Float>(src[i]), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 				}
 			}
 		}
@@ -1552,7 +1569,7 @@
 						{
 							Int offset = Int(i) + Extract(offsets, j);
 							if (interleavedByLane) { offset = offset * SIMD::Width + j; }
-							ptrBase[offset] = Extract(src.Float(i), j);
+							Store(Extract(src.Float(i), j), &ptrBase[offset], sizeof(float), atomic, memoryOrder);
 						}
 					}
 				}
@@ -1566,7 +1583,7 @@
 					Pointer<SIMD::Float> dst = ptrBase;
 					for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 					{
-						dst[i] = src.Float(i);
+						Store(src.Float(i), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 					}
 				}
 				else
@@ -1575,7 +1592,7 @@
 					Pointer<SIMD::Float> dst = ptrBase;
 					for (auto i = 0u; i < elementTy.sizeInComponents; i++)
 					{
-						dst[i] = SIMD::Float(src.Float(i));
+						Store<SIMD::Float>(SIMD::Float(src.Float(i)), &dst[i], sizeof(float), atomic, memoryOrder);  // TODO: optimize alignment
 					}
 				}
 			}
@@ -2398,6 +2415,21 @@
 		}
 	}
 
+	std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
+	{
+		switch(memorySemantics)
+		{
+		case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
+		case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
+		case spv::MemorySemanticsReleaseMask:                return std::memory_order_release;
+		case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
+		case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
+		default:
+			UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
+			return std::memory_order_acq_rel;
+		}
+	}
+
 	SIMD::Float SpirvShader::Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const
 	{
 		SIMD::Float d = x.Float(0) * y.Float(0);
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index d02c40e..84f817b 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -20,6 +20,9 @@
 #include "System/Types.hpp"
 #include "Vulkan/VkDebug.hpp"
 #include "Vulkan/VkConfig.h"
+#include "Device/Config.hpp"
+
+#include <spirv/unified1/spirv.hpp>
 
 #include <array>
 #include <cstring>
@@ -30,8 +33,6 @@
 #include <cstdint>
 #include <type_traits>
 #include <memory>
-#include <spirv/unified1/spirv.hpp>
-#include <Device/Config.hpp>
 
 namespace vk
 {
@@ -500,9 +501,10 @@
 		void EmitAll(InsnIterator insn, SpirvRoutine *routine) const;
 		void EmitBranch(InsnIterator insn, SpirvRoutine *routine) const;
 
-		// OpcodeName returns the name of the opcode op.
-		// If NDEBUG is defined, then OpcodeName will only return the numerical code.
+		// OpcodeName() returns the name of the opcode op.
+		// If NDEBUG is defined, then OpcodeName() will only return the numerical code.
 		static std::string OpcodeName(spv::Op op);
+		static std::memory_order MemoryOrder(spv::MemorySemanticsMask memorySemantics);
 
 		// Helper as we often need to take dot products as part of doing other things.
 		SIMD::Float Dot(unsigned numComponents, GenericValue const & x, GenericValue const & y) const;
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 8c8c7e3..624af83 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -838,6 +838,29 @@
 		}
 	}
 
+	static llvm::AtomicOrdering atomicOrdering(bool atomic, std::memory_order memoryOrder)
+	{
+		#if REACTOR_LLVM_VERSION < 7
+			return llvm::AtomicOrdering::NotAtomic;
+		#endif
+
+		if(!atomic)
+		{
+			return llvm::AtomicOrdering::NotAtomic;
+		}
+
+		switch(memoryOrder)
+		{
+		case std::memory_order_relaxed: return llvm::AtomicOrdering::Monotonic;  // https://llvm.org/docs/Atomics.html#monotonic
+		case std::memory_order_consume: return llvm::AtomicOrdering::Acquire;    // https://llvm.org/docs/Atomics.html#acquire: "It should also be used for C++11/C11 memory_order_consume."
+		case std::memory_order_acquire: return llvm::AtomicOrdering::Acquire;
+		case std::memory_order_release: return llvm::AtomicOrdering::Release;
+		case std::memory_order_acq_rel: return llvm::AtomicOrdering::AcquireRelease;
+		case std::memory_order_seq_cst: return llvm::AtomicOrdering::SequentiallyConsistent;
+		default: assert(false);         return llvm::AtomicOrdering::AcquireRelease;
+		}
+	}
+
 	Nucleus::Nucleus()
 	{
 		::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
@@ -1190,7 +1213,7 @@
 		return V(::builder->CreateNot(V(v)));
 	}
 
-	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
+	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
 	{
 		switch(asInternalType(type))
 		{
@@ -1201,7 +1224,7 @@
 			return createBitCast(
 				createInsertElement(
 					V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
-					createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment),
+					createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment, atomic, memoryOrder),
 					0),
 				type);
 		case Type_v2i16:
@@ -1209,21 +1232,26 @@
 			if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
 			{
 				Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
-				Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
+				Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment, atomic, memoryOrder);
 				i = createZExt(i, Long::getType());
 				Value *v = createInsertElement(u, i, 0);
 				return createBitCast(v, type);
 			}
 			// Fallthrough to non-emulated case.
 		case Type_LLVM:
-			assert(V(ptr)->getType()->getContainedType(0) == T(type));
-			return V(::builder->Insert(new llvm::LoadInst(V(ptr), "", isVolatile, alignment)));
+			{
+				assert(V(ptr)->getType()->getContainedType(0) == T(type));
+				auto load = new llvm::LoadInst(V(ptr), "", isVolatile, alignment);
+				load->setAtomic(atomicOrdering(atomic, memoryOrder));
+
+				return V(::builder->Insert(load));
+			}
 		default:
 			assert(false); return nullptr;
 		}
 	}
 
-	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
+	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
 	{
 		switch(asInternalType(type))
 		{
@@ -1235,7 +1263,7 @@
 				createExtractElement(
 					createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
 				createBitCast(ptr, Pointer<Long>::getType()),
-				Long::getType(), isVolatile, alignment);
+				Long::getType(), isVolatile, alignment, atomic, memoryOrder);
 			return value;
 		case Type_v2i16:
 		case Type_v4i8:
@@ -1244,14 +1272,18 @@
 				createStore(
 					createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
 					createBitCast(ptr, Pointer<Int>::getType()),
-					Int::getType(), isVolatile, alignment);
+					Int::getType(), isVolatile, alignment, atomic, memoryOrder);
 				return value;
 			}
 			// Fallthrough to non-emulated case.
 		case Type_LLVM:
-			assert(V(ptr)->getType()->getContainedType(0) == T(type));
-			::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
-			return value;
+			{
+				assert(V(ptr)->getType()->getContainedType(0) == T(type));
+				auto store = ::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
+				store->setAtomic(atomicOrdering(atomic, memoryOrder));
+
+				return value;
+			}
 		default:
 			assert(false); return nullptr;
 		}
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index ef695c5..1adeaf3 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -19,6 +19,7 @@
 #include <cstdarg>
 #include <cstdint>
 #include <vector>
+#include <atomic>
 
 namespace rr
 {
@@ -95,8 +96,8 @@
 		static Value *createNot(Value *V);
 
 		// Memory instructions
-		static Value *createLoad(Value *ptr, Type *type, bool isVolatile = false, unsigned int align = 0);
-		static Value *createStore(Value *value, Value *ptr, Type *type, bool isVolatile = false, unsigned int align = 0);
+		static Value *createLoad(Value *ptr, Type *type, bool isVolatile = false, unsigned int alignment = 0, bool atomic = false , std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createStore(Value *value, Value *ptr, Type *type, bool isVolatile = false, unsigned int aligment = 0, bool atomic = false, std::memory_order memoryOrder = std::memory_order_relaxed);
 		static Value *createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex);
 
 		// Atomic instructions
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 964674e..39f7f2b 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2234,6 +2234,18 @@
 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset);
 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset);
 
+	template<typename T>
+	RValue<T> Load(RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+	{
+		return RValue<T>(Nucleus::createLoad(pointer.value, T::getType(), false, alignment, atomic, memoryOrder));
+	}
+
+	template<typename T>
+	void Store(RValue<T> value, RValue<Pointer<T>> pointer, unsigned int alignment, bool atomic, std::memory_order memoryOrder)
+	{
+		Nucleus::createStore(value.value, pointer.value, T::getType(), false, alignment, atomic, memoryOrder);
+	}
+
 	template<class T, int S = 1>
 	class Array : public LValue<T>
 	{
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index bd406ce..fce1717 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -846,8 +846,11 @@
 		}
 	}
 
-	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
+	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
 	{
+		assert(!atomic);  // Unimplemented
+		assert(memoryOrder == std::memory_order_relaxed);  // Unimplemented
+
 		int valueType = (int)reinterpret_cast<intptr_t>(type);
 		Ice::Variable *result = ::function->makeVariable(T(type));
 
@@ -900,8 +903,11 @@
 		return V(result);
 	}
 
-	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
+	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align, bool atomic, std::memory_order memoryOrder)
 	{
+		assert(!atomic);  // Unimplemented
+		assert(memoryOrder == std::memory_order_relaxed);  // Unimplemented
+
 		#if __has_feature(memory_sanitizer)
 			// Mark all (non-stack) memory writes as initialized by calling __msan_unpoison
 			if(align != 0)