SpirvShader: Implement OpMemoryBarrier.
Tests: dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.*
Tests: dEQP-VK.subgroups.basic.compute.*
Tests: dEQP-VK.compute.basic.*
Bug: b/132232716
Change-Id: If238f6b4af5c0ff6909a62241e0adb5677cb6c0b
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/30852
Presubmit-Ready: Ben Clayton <bclayton@google.com>
Tested-by: Chris Forbes <chrisforbes@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 34db239..020309e 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -880,6 +880,7 @@
case spv::OpAtomicStore:
case spv::OpImageWrite:
case spv::OpCopyMemory:
+ case spv::OpMemoryBarrier:
// Don't need to do anything during analysis pass
break;
@@ -2461,6 +2462,9 @@
case spv::OpCopyMemory:
return EmitCopyMemory(insn, state);
+ case spv::OpMemoryBarrier:
+ return EmitMemoryBarrier(insn, state);
+
case spv::OpGroupNonUniformElect:
return EmitGroupNonUniform(insn, state);
@@ -4316,7 +4320,13 @@
std::memory_order SpirvShader::MemoryOrder(spv::MemorySemanticsMask memorySemantics)
{
- switch(memorySemantics)
+ auto control = static_cast<uint32_t>(memorySemantics) & static_cast<uint32_t>(
+ spv::MemorySemanticsAcquireMask |
+ spv::MemorySemanticsReleaseMask |
+ spv::MemorySemanticsAcquireReleaseMask |
+ spv::MemorySemanticsSequentiallyConsistentMask
+ );
+ switch (control)
{
case spv::MemorySemanticsMaskNone: return std::memory_order_relaxed;
case spv::MemorySemanticsAcquireMask: return std::memory_order_acquire;
@@ -4324,7 +4334,9 @@
case spv::MemorySemanticsAcquireReleaseMask: return std::memory_order_acq_rel;
case spv::MemorySemanticsSequentiallyConsistentMask: return std::memory_order_acq_rel; // Vulkan 1.1: "SequentiallyConsistent is treated as AcquireRelease"
default:
- UNREACHABLE("MemorySemanticsMask %x", memorySemantics);
+ // "it is invalid for more than one of these four bits to be set:
+ // Acquire, Release, AcquireRelease, or SequentiallyConsistent."
+ UNREACHABLE("MemorySemanticsMask: %x", int(control));
return std::memory_order_acq_rel;
}
}
@@ -5456,11 +5468,29 @@
return EmitResult::Continue;
}
+ SpirvShader::EmitResult SpirvShader::EmitMemoryBarrier(InsnIterator insn, EmitState *state) const
+ {
+ auto semantics = spv::MemorySemanticsMask(GetConstScalarInt(insn.word(2)));
+ // TODO: We probably want to consider the memory scope here. For now,
+ // just always emit the full fence.
+ Fence(semantics);
+ return EmitResult::Continue;
+ }
+
+ void SpirvShader::Fence(spv::MemorySemanticsMask semantics) const
+ {
+ if (semantics == spv::MemorySemanticsMaskNone)
+ {
+ return; //no-op
+ }
+ rr::Fence(MemoryOrder(semantics));
+ }
+
SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const
{
auto &type = getType(Type::ID(insn.word(1)));
Object::ID resultId = insn.word(2);
- auto scope = GetScope(insn.word(3));
+ auto scope = spv::Scope(GetConstScalarInt(insn.word(3)));
ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1");
auto &dst = state->routine->createIntermediate(resultId, type.sizeInComponents);
@@ -5485,12 +5515,12 @@
return EmitResult::Continue;
}
- spv::Scope SpirvShader::GetScope(Object::ID id) const
+ uint32_t SpirvShader::GetConstScalarInt(Object::ID id) const
{
auto &scopeObj = getObject(id);
ASSERT(scopeObj.kind == Object::Kind::Constant);
ASSERT(getType(scopeObj.type).sizeInComponents == 1);
- return spv::Scope(scopeObj.constantValue[0]);
+ return scopeObj.constantValue[0];
}
void SpirvShader::emitEpilog(SpirvRoutine *routine) const
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index d26f1a6..cb37cce 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -934,11 +934,15 @@
EmitResult EmitAtomicCompareExchange(InsnIterator insn, EmitState *state) const;
EmitResult EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const;
EmitResult EmitCopyMemory(InsnIterator insn, EmitState *state) const;
+ EmitResult EmitMemoryBarrier(InsnIterator insn, EmitState *state) const;
EmitResult EmitGroupNonUniform(InsnIterator insn, EmitState *state) const;
void GetImageDimensions(SpirvRoutine const *routine, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const;
SIMD::Pointer GetTexelAddress(SpirvRoutine const *routine, SIMD::Pointer base, GenericValue const & coordinate, Type const & imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect) const;
- spv::Scope GetScope(Object::ID id) const;
+ uint32_t GetConstScalarInt(Object::ID id) const;
+
+ // Emits a rr::Fence for the given MemorySemanticsMask.
+ void Fence(spv::MemorySemanticsMask semantics) const;
// OpcodeName() returns the name of the opcode op.
// If NDEBUG is defined, then OpcodeName() will only return the numerical code.
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index ecb74f3..8616bea 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1455,6 +1455,11 @@
::builder->CreateCall(func, { V(val), elPtrs, align, i8Mask });
}
+ void Nucleus::createFence(std::memory_order memoryOrder)
+ {
+ ::builder->CreateFence(atomicOrdering(true, memoryOrder));
+ }
+
Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
{
RR_DEBUG_INFO_UPDATE_LOC();
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index 59ece44..a27c4e9 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -124,6 +124,9 @@
static Value *createGather(Value *base, Type *elementType, Value *offsets, Value *mask, unsigned int alignment);
static void createScatter(Value *base, Value *value, Value *offsets, Value *mask, unsigned int alignment);
+ // Barrier instructions
+ static void createFence(std::memory_order memoryOrder);
+
// Atomic instructions
static Value *createAtomicAdd(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
static Value *createAtomicSub(Value *ptr, Value *value, std::memory_order memoryOrder = std::memory_order_relaxed);
diff --git a/src/Reactor/Reactor.cpp b/src/Reactor/Reactor.cpp
index 56ab1da..c4740a7 100644
--- a/src/Reactor/Reactor.cpp
+++ b/src/Reactor/Reactor.cpp
@@ -13,6 +13,7 @@
// limitations under the License.
#include "Reactor.hpp"
+#include "Debug.hpp"
// Define REACTOR_MATERIALIZE_LVALUES_ON_DEFINITION to non-zero to ensure all
// variables have a stack location obtained throuch alloca().
@@ -4226,4 +4227,14 @@
Nucleus::createScatter(base.value, val.value, offsets.value, mask.value, alignment);
}
+ void Fence(std::memory_order memoryOrder)
+ {
+ ASSERT_MSG(memoryOrder == std::memory_order_acquire ||
+ memoryOrder == std::memory_order_release ||
+ memoryOrder == std::memory_order_acq_rel ||
+ memoryOrder == std::memory_order_seq_cst,
+ "Unsupported memoryOrder: %d", int(memoryOrder));
+ Nucleus::createFence(memoryOrder);
+ }
+
}
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 38bc625..6fbc061 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2385,6 +2385,11 @@
Store(RValue<T>(value), RValue<Pointer<T>>(pointer), alignment, atomic, memoryOrder);
}
+ // Fence adds a memory barrier that enforces ordering constraints on memory
+ // operations. memoryOrder can only be one of:
+ // std::memory_order_acquire, std::memory_order_release,
+ // std::memory_order_acq_rel, or std::memory_order_seq_cst.
+ void Fence(std::memory_order memoryOrder);
template<class T, int S = 1>
class Array : public LValue<T>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index e196415..4310aba 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3455,6 +3455,7 @@
// Below are functions currently unimplemented for the Subzero backend.
// They are stubbed to satisfy the linker.
+ void Nucleus::createFence(std::memory_order memoryOrder) { UNIMPLEMENTED("Subzero createFence()"); }
Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createGather()"); return nullptr; }
void Nucleus::createScatter(Value *base, Value *val, Value *offsets, Value *mask, unsigned int alignment) { UNIMPLEMENTED("Subzero createScatter()"); }
RValue<Float4> Sin(RValue<Float4> x) { UNIMPLEMENTED("Subzero Sin()"); return Float4(0); }