SpirvShader: Implement OpMemoryBarrier. Tests: dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.* Tests: dEQP-VK.subgroups.basic.compute.* Tests: dEQP-VK.compute.basic.* Bug: b/132232716 Change-Id: If238f6b4af5c0ff6909a62241e0adb5677cb6c0b Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30852 Presubmit-Ready: Ben Clayton <bclayton@google.com> Tested-by: Chris Forbes <chrisforbes@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: Nicolas Capens <nicolascapens@google.com>

commit: b16c5867010e82e8375751721f3f8c29073909e9 [log] [tgz]
author: Ben Clayton <bclayton@google.com> Wed May 08 14:01:38 2019 +0100
committer: Chris Forbes <chrisforbes@google.com> Thu May 09 01:06:17 2019 +0000
tree: be8194749ab8d9d6ad9672920923a66f91c032d3
parent: 895df0dd8bb556dc1aff38c3b0831cf70e89be34 [diff]
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 34db239..020309e 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp

@@ -880,6 +880,7 @@
 			case spv::OpAtomicStore:
 			case spv::OpImageWrite:
 			case spv::OpCopyMemory:
+			case spv::OpMemoryBarrier:
 				// Don't need to do anything during analysis pass
 				break;
 
@@ -2461,6 +2462,9 @@
 		case spv::OpCopyMemory:
 			return EmitCopyMemory(insn, state);
 
+		case spv::OpMemoryBarrier:
+			return EmitMemoryBarrier(insn, state);
+
 		case spv::OpGroupNonUniformElect:
 			return EmitGroupNonUniform(insn, state);
 
@@ -4316,7 +4320,13 @@
 
 	std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
 	{
-		switch(memorySemantics)
+		auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
+			spv::MemorySemanticsAcquireMask |
+			spv::MemorySemanticsReleaseMask |
+			spv::MemorySemanticsAcquireReleaseMask |
+			spv::MemorySemanticsSequentiallyConsistentMask
+		);
+		switch (control)
 		{
 		case spv::MemorySemanticsMaskNone:                   return std::memory_order_relaxed;
 		case spv::MemorySemanticsAcquireMask:                return std::memory_order_acquire;
@@ -4324,7 +4334,9 @@
 		case spv::MemorySemanticsAcquireReleaseMask:         return std::memory_order_acq_rel;
 		case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel;  // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
 		default:
-			UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
+			// "it is invalid for more than one of these four bits to be set:
+			// Acquire, Release, AcquireRelease, or SequentiallyConsistent."
+			UNREACHABLE("MemorySemanticsMask: %x", int(control));
 			return std::memory_order_acq_rel;
 		}
 	}
@@ -5456,11 +5468,29 @@
 		return EmitResult::Continue;
 	}
 
+	SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
+	{
+		auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
+		// TODO: We probably want to consider the memory scope here. For now,
+		// just always emit the full fence.
+		Fence(semantics);
+		return EmitResult::Continue;
+	}
+
+	void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
+	{
+		if (semantics == spv::MemorySemanticsMaskNone)
+		{
+			return; //no-op
+		}
+		rr::Fence(MemoryOrder(semantics));
+	}
+
 	SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
 	{
 		auto &type = getType(Type::ID(insn.word(1)));
 		Object::ID resultId = insn.word(2);
-		auto scope = GetScope(insn.word(3));
+		auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
 		ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
 
 		auto &dst = state->routine->createIntermediate(resultId, type.sizeInComponents);
@@ -5485,12 +5515,12 @@
 		return EmitResult::Continue;
 	}
 
-	spv::Scope SpirvShader::GetScope(Object::ID id) const
+	uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
 	{
 		auto &scopeObj = getObject(id);
 		ASSERT(scopeObj.kind == Object::Kind::Constant);
 		ASSERT(getType(scopeObj.type).sizeInComponents == 1);
-		return spv::Scope(scopeObj.constantValue[0]);
+		return scopeObj.constantValue[0];
 	}
 
 	void SpirvShader::emitEpilog(SpirvRoutine *routine) const

diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index d26f1a6..cb37cce 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp

@@ -934,11 +934,15 @@
 		EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
+		EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
 		EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;
 
 		void GetImageDimensions(SpirvRoutine const *routine, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
 		SIMD::Pointer GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
-		spv::Scope GetScope(Object::ID id) const;
+		uint32_t GetConstScalarInt(Object::ID id) const;
+
+		// Emits a rr::Fence for the given MemorySemanticsMask.
+		void Fence(spv::MemorySemanticsMask semantics) const;
 
 		// OpcodeName() returns the name of the opcode op.
 		// If NDEBUG is defined, then OpcodeName() will only return the numerical code.

diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index ecb74f3..8616bea 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp

@@ -1455,6 +1455,11 @@
 		::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
 	}
 
+	void Nucleus::createFence(std::memory_order memoryOrder)
+	{
+		::builder->CreateFence(atomicOrdering(true, memoryOrder));
+	}
+
 	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
 	{
 		RR_DEBUG_INFO_UPDATE_LOC();

diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 59ece44..a27c4e9 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp

@@ -124,6 +124,9 @@
 		static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment);
 		static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment);
 
+		// Barrier instructions
+		static void createFence(std::memory_order memoryOrder);
+
 		// Atomic instructions
 		static Value *createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
 		static Value *createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);

diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 56ab1da..c4740a7 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp

@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "Reactor.hpp"
+#include "Debug.hpp"
 
 // Define REACTOR_MATERIALIZE_LVALUES_ON_DEFINITION to non-zero to ensure all
 // variables have a stack location obtained throuch alloca().
@@ -4226,4 +4227,14 @@
 		Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
 	}
 
+	void Fence(std::memory_order memoryOrder)
+	{
+		ASSERT_MSG(memoryOrder == std::memory_order_acquire ||
+			memoryOrder == std::memory_order_release ||
+			memoryOrder == std::memory_order_acq_rel ||
+			memoryOrder == std::memory_order_seq_cst,
+			"Unsupported memoryOrder: %d", int(memoryOrder));
+		Nucleus::createFence(memoryOrder);
+	}
+
 }

diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 38bc625..6fbc061 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp

@@ -2385,6 +2385,11 @@
 		Store(RValue<T>(value), RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
 	}
 
+	// Fence adds a memory barrier that enforces ordering constraints on memory
+	// operations. memoryOrder can only be one of:
+	// std::memory_order_acquire, std::memory_order_release,
+	// std::memory_order_acq_rel, or std::memory_order_seq_cst.
+	void Fence(std::memory_order memoryOrder);
 
 	template<class T, int S = 1>
 	class Array : public LValue<T>

diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index e196415..4310aba 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp

@@ -3455,6 +3455,7 @@
 
 	// Below are functions currently unimplemented for the Subzero backend.
 	// They are stubbed to satisfy the linker.
+	void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
 	Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
 	void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
 	RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }
commit	b16c5867010e82e8375751721f3f8c29073909e9	[log] [tgz]
author	Ben Clayton <bclayton@google.com>	Wed May 08 14:01:38 2019 +0100
committer	Chris Forbes <chrisforbes@google.com>	Thu May 09 01:06:17 2019 +0000
tree	be8194749ab8d9d6ad9672920923a66f91c032d3
parent	895df0dd8bb556dc1aff38c3b0831cf70e89be34 [diff]