Remove unimplemented MaskedStore/MaskedLoad usage

Reactor's MaskedStore and MaskedLoad intrinsics were only implemented
for the LLVM backend, and would lead to asserts when they're used with
the Subzero backend.

This patch addresses this issue by falling back to using scatter/gather
operations. While slower, this is justified by how rarely this code path
was being used.

Note these Reactor functions have been marked with the [[deprecated]]
C++ attribute to cause a warning, which we treat as an error, if they
would be used again. They must be implemented in Subzero (note an
'emulated' implementation is acceptable, which is what we already do for
gather/scatter), before using them again.

Bug: b/192310780
Change-Id: I42b3691f6c09ee4f884ad8960492558e372abe6c
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/56110
Presubmit-Ready: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Sean Risser <srisser@google.com>
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index bb9205e..99f5978 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -329,6 +329,7 @@
 			// Offsets are sequential. Perform regular load.
 			return rr::Load(rr::Pointer<T>(base + staticOffsets[0]), alignment, atomic, order);
 		}
+
 		if(hasStaticEqualOffsets())
 		{
 			// Load one, replicate.
@@ -381,10 +382,7 @@
 			break;
 		}
 
-		if(hasStaticSequentialOffsets(sizeof(float)))
-		{
-			return rr::MaskedLoad(rr::Pointer<T>(base + staticOffsets[0]), mask, alignment, zeroMaskedLanes);
-		}
+		// TODO(b/195446858): Optimize static sequential offsets case by using masked load.
 
 		return rr::Gather(rr::Pointer<EL>(base), offs, mask, alignment, zeroMaskedLanes);
 	}
@@ -458,20 +456,15 @@
 				*rr::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal);
 			}
 		}
-		else if(hasStaticSequentialOffsets(sizeof(float)))
+		else if(hasStaticSequentialOffsets(sizeof(float)) &&
+		        isStaticallyInBounds(sizeof(float), robustness))
 		{
-			if(isStaticallyInBounds(sizeof(float), robustness))
-			{
-				// Pointer has no elements OOB, and the store is not atomic.
-				// Perform a RMW.
-				auto p = rr::Pointer<SIMD::Int>(base + staticOffsets[0], alignment);
-				auto prev = *p;
-				*p = (prev & ~mask) | (As<SIMD::Int>(val) & mask);
-			}
-			else
-			{
-				rr::MaskedStore(rr::Pointer<T>(base + staticOffsets[0]), val, mask, alignment);
-			}
+			// TODO(b/195446858): Optimize using masked store.
+			// Pointer has no elements OOB, and the store is not atomic.
+			// Perform a read-modify-write.
+			auto p = rr::Pointer<SIMD::Int>(base + staticOffsets[0], alignment);
+			auto prev = *p;
+			*p = (prev & ~mask) | (As<SIMD::Int>(val) & mask);
 		}
 		else
 		{
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 58c7ca5..3d330b8 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -1586,6 +1586,7 @@
 		}
 		// Fall through to default case.
 	default:
+		// TODO(b/192310780): StorageClassFunction out-of-bounds accesses are undefined behavior.
 		// TODO(b/137183137): Optimize if the pointer resulted from OpInBoundsAccessChain.
 		// TODO(b/131224163): Optimize cases statically known to be within bounds.
 		return OutOfBoundsBehavior::UndefinedValue;
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index 240e9dc..a916aba 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -2528,10 +2528,11 @@
 }
 
 // TODO: Use SIMD to template these.
-RValue<Float4> MaskedLoad(RValue<Pointer<Float4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
-RValue<Int4> MaskedLoad(RValue<Pointer<Int4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
-void MaskedStore(RValue<Pointer<Float4>> base, RValue<Float4> val, RValue<Int4> mask, unsigned int alignment);
-void MaskedStore(RValue<Pointer<Int4>> base, RValue<Int4> val, RValue<Int4> mask, unsigned int alignment);
+// TODO(b/155867273): These can be undeprecated if implemented for Subzero.
+[[deprecated]] RValue<Float4> MaskedLoad(RValue<Pointer<Float4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
+[[deprecated]] RValue<Int4> MaskedLoad(RValue<Pointer<Int4>> base, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
+[[deprecated]] void MaskedStore(RValue<Pointer<Float4>> base, RValue<Float4> val, RValue<Int4> mask, unsigned int alignment);
+[[deprecated]] void MaskedStore(RValue<Pointer<Int4>> base, RValue<Int4> val, RValue<Int4> mask, unsigned int alignment);
 
 RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
 RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes = false);
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 9127871..458b357 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -4280,13 +4280,14 @@
 Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment, bool zeroMaskedLanes)
 {
 	RR_DEBUG_INFO_UPDATE_LOC();
-	UNIMPLEMENTED_NO_BUG("Subzero createMaskedLoad()");
+	UNIMPLEMENTED("b/155867273 Subzero createMaskedLoad()");
 	return nullptr;
 }
+
 void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
 {
 	RR_DEBUG_INFO_UPDATE_LOC();
-	UNIMPLEMENTED_NO_BUG("Subzero createMaskedStore()");
+	UNIMPLEMENTED("b/155867273 Subzero createMaskedStore()");
 }
 
 RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)