Add support atomic instructions as used with storage images

- OpAtomicIAdd
- OpAtomicSMin
- OpAtomicSMax
- OpAtomicUMin
- OpAtomicUMax
- OpAtomicAnd
- OpAtomicOr
- OpAtomicXor
- OpAtomicExchange

Includes required plumbing down through Reactor. Subzero support is just
stubbed for now.

Bug: b/130768731
Bug: b/127472316
Test: dEQP-VK.image.*
Change-Id: Iecf9e9ed602c4fde674f54491658048c96fd02c6
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/29453
Tested-by: Chris Forbes <chrisforbes@google.com>
Presubmit-Ready: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 53974fe..2525911 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -678,6 +678,15 @@
 			case spv::OpDPdyFine:
 			case spv::OpFwidthFine:
 			case spv::OpAtomicLoad:
+			case spv::OpAtomicIAdd:
+			case spv::OpAtomicSMin:
+			case spv::OpAtomicSMax:
+			case spv::OpAtomicUMin:
+			case spv::OpAtomicUMax:
+			case spv::OpAtomicAnd:
+			case spv::OpAtomicOr:
+			case spv::OpAtomicXor:
+			case spv::OpAtomicExchange:
 			case spv::OpPhi:
 			case spv::OpImageSampleImplicitLod:
 			case spv::OpImageQuerySize:
@@ -1978,6 +1987,17 @@
 		case spv::OpAtomicStore:
 			return EmitStore(insn, state);
 
+		case spv::OpAtomicIAdd:
+		case spv::OpAtomicSMin:
+		case spv::OpAtomicSMax:
+		case spv::OpAtomicUMin:
+		case spv::OpAtomicUMax:
+		case spv::OpAtomicAnd:
+		case spv::OpAtomicOr:
+		case spv::OpAtomicXor:
+		case spv::OpAtomicExchange:
+			return EmitAtomicOp(insn, state);
+
 		case spv::OpAccessChain:
 		case spv::OpInBoundsAccessChain:
 			return EmitAccessChain(insn, state);
@@ -4695,13 +4715,75 @@
 
 		state->routine->createPointer(resultId, imageBase);
 
-		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t));
+		// TODO: texelOffset is in bytes. get rid of shift once Ben's changes for DivergentPointer land
+		SIMD::Int texelOffset = GetTexelOffset(coordinate, imageType, binding, sizeof(uint32_t)) >> 2;
 		auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
 		dst.move(0, texelOffset);
 
 		return EmitResult::Continue;
 	}
 
+	SpirvShader::EmitResult SpirvShader::EmitAtomicOp(InsnIterator insn, EmitState *state) const
+	{
+		auto &resultType = getType(Type::ID(insn.word(1)));
+		Object::ID resultId = insn.word(2);
+		Object::ID semanticsId = insn.word(5);
+		auto memorySemantics = static_cast<spv::MemorySemanticsMask>(getObject(semanticsId).constantValue[0]);
+		auto memoryOrder = MemoryOrder(memorySemantics);
+		auto value = GenericValue(this, state->routine, insn.word(6));
+		auto &dst = state->routine->createIntermediate(resultId, resultType.sizeInComponents);
+		auto ptr = Pointer<UInt>(state->routine->getPointer(insn.word(3)));
+		auto offsets = state->routine->getIntermediate(insn.word(3)).UInt(0);
+
+		SIMD::UInt x;
+		for (int j = 0; j < SIMD::Width; j++)
+		{
+			If(Extract(state->activeLaneMask(), j) != 0)
+			{
+				auto offset = Extract(offsets, j);
+				auto laneValue = Extract(value.UInt(0), j);
+				UInt v;
+				switch (insn.opcode())
+				{
+				case spv::OpAtomicIAdd:
+					v = AddAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicAnd:
+					v = AndAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicOr:
+					v = OrAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicXor:
+					v = XorAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicSMin:
+					v = As<UInt>(MinAtomic(As<Pointer<Int>>(&ptr[offset]), As<Int>(laneValue), memoryOrder));
+					break;
+				case spv::OpAtomicSMax:
+					v = As<UInt>(MaxAtomic(As<Pointer<Int>>(&ptr[offset]), As<Int>(laneValue), memoryOrder));
+					break;
+				case spv::OpAtomicUMin:
+					v = MinAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicUMax:
+					v = MaxAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				case spv::OpAtomicExchange:
+					v = ExchangeAtomic(&ptr[offset], laneValue, memoryOrder);
+					break;
+				default:
+					UNIMPLEMENTED("Atomic op", OpcodeName(insn.opcode()).c_str());
+					break;
+				}
+				x = Insert(x, v, j);
+			}
+		}
+
+		dst.move(0, x);
+		return EmitResult::Continue;
+	}
+
 	void SpirvShader::emitEpilog(SpirvRoutine *routine) const
 	{
 		for (auto insn : *this)
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 76474b4..2eb3cd8 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -729,6 +729,7 @@
 		EmitResult EmitImageRead(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageWrite(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitImageTexelPointer(InsnIterator insn, EmitState *state) const;
+		EmitResult EmitAtomicOp(InsnIterator insn, EmitState *state) const;
 
 		SIMD::Int GetTexelOffset(GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize) const;
 
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 6c30207..9bfc908 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1451,10 +1451,46 @@
 			T(llvm::PointerType::get(T(type), 0)));
 	}
 
-	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
+	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
 	{
 		RR_DEBUG_INFO_UPDATE_LOC();
-		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent));
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
+	}
+
+	Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::And, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
+	}
+
+	Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Or, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
+	}
+
+	Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xor, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
+	}
+
+	Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Min, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
+	}
+
+	Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Max, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
+	}
+
+	Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		RR_DEBUG_INFO_UPDATE_LOC();
+		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, V(ptr), V(value), atomicOrdering(true, memoryOrder)));
 	}
 
 	Value *Nucleus::createTrunc(Value *v, Type *destType)
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 1adeaf3..593f7f1 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -101,7 +101,13 @@
 		static Value *createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex);
 
 		// Atomic instructions
-		static Value *createAtomicAdd(Value *ptr, Value *value);
+		static Value *createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
+		static Value *createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
 
 		// Cast/Conversion Operators
 		static Value *createTrunc(Value *V, Type *destType);
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 96a8d25..5952254 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -2507,6 +2507,51 @@
 		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
 	}
 
+	RValue<UInt> AddAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicAdd(x.value, y.value, memoryOrder));
+	}
+
+	RValue<UInt> AndAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicAnd(x.value, y.value, memoryOrder));
+	}
+
+	RValue<UInt> OrAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicOr(x.value, y.value, memoryOrder));
+	}
+
+	RValue<UInt> XorAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicXor(x.value, y.value, memoryOrder));
+	}
+
+	RValue<Int> MinAtomic(RValue<Pointer<Int> > x, RValue<Int> y, std::memory_order memoryOrder)
+	{
+		return RValue<Int>(Nucleus::createAtomicMin(x.value, y.value, memoryOrder));
+	}
+
+	RValue<UInt> MinAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicMin(x.value, y.value, memoryOrder));
+	}
+
+	RValue<Int> MaxAtomic(RValue<Pointer<Int> > x, RValue<Int> y, std::memory_order memoryOrder)
+	{
+		return RValue<Int>(Nucleus::createAtomicMax(x.value, y.value, memoryOrder));
+	}
+
+	RValue<UInt> MaxAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicMax(x.value, y.value, memoryOrder));
+	}
+
+	RValue<UInt> ExchangeAtomic(RValue<Pointer<UInt> > x, RValue<UInt> y, std::memory_order memoryOrder)
+	{
+		return RValue<UInt>(Nucleus::createAtomicExchange(x.value, y.value, memoryOrder));
+	}
+
 	UInt::UInt(Argument<UInt> argument)
 	{
 		materialize();  // FIXME(b/129757459)
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 4e95806..0b14131 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1267,6 +1267,17 @@
 	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y);
 	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y);
 	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max);
+
+	RValue<UInt> AddAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+	RValue<UInt> AndAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+	RValue<UInt> OrAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+	RValue<UInt> XorAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+	RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder);
+	RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder);
+	RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+	RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+	RValue<UInt> ExchangeAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder);
+
 //	RValue<UInt> RoundUInt(RValue<Float> cast);
 
 	class Int2 : public LValue<Int2>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 3afed7b..e4ba796 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -1033,12 +1033,48 @@
 		return createAdd(ptr, index);
 	}
 
-	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
+	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder)
 	{
 		UNIMPLEMENTED("createAtomicAdd");
 		return nullptr;
 	}
 
+	Value *Nucleus::createAtomicAnd(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		UNIMPLEMENTED("createAtomicAnd");
+		return nullptr;
+	}
+
+	Value *Nucleus::createAtomicOr(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		UNIMPLEMENTED("createAtomicOr");
+		return nullptr;
+	}
+
+	Value *Nucleus::createAtomicXor(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		UNIMPLEMENTED("createAtomicXor");
+		return nullptr;
+	}
+
+	Value *Nucleus::createAtomicMin(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		UNIMPLEMENTED("createAtomicMin");
+		return nullptr;
+	}
+
+	Value *Nucleus::createAtomicMax(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		UNIMPLEMENTED("createAtomicMax");
+		return nullptr;
+	}
+
+	Value *Nucleus::createAtomicExchange(Value *ptr, Value *value, std::memory_order memoryOrder)
+	{
+		UNIMPLEMENTED("createAtomicExchange");
+		return nullptr;
+	}
+
 	static Value *createCast(Ice::InstCast::OpKind op, Value *v, Type *destType)
 	{
 		if(v->getType() == T(destType))