Reactor (LLVM): Add support for masked loads and stores.

Has wider support than gather / scatter, and is faster.

Bug: b/135609394
Change-Id: Ib1435331f3130fbef7cbf9eaf1c0c2570a0ec2a1
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33169
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Tested-by: Ben Clayton <bclayton@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index 781bc15..0734763 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -1446,6 +1446,40 @@
 		}
 	}
 
+	Value *Nucleus::createMaskedLoad(Value *ptr, Type *elTy, Value *mask, unsigned int alignment)
+	{
+		ASSERT(V(ptr)->getType()->isPointerTy());
+		ASSERT(V(mask)->getType()->isVectorTy());
+
+		auto numEls = V(mask)->getType()->getVectorNumElements();
+		auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
+		auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
+		auto elVecTy = ::llvm::VectorType::get(T(elTy), numEls);
+		auto elVecPtrTy = elVecTy->getPointerTo();
+		auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
+		auto passthrough = ::llvm::Constant::getNullValue(elVecTy);
+		auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
+		auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_load, { elVecTy, elVecPtrTy } );
+		return V(::builder->CreateCall(func, { V(ptr), align, i8Mask, passthrough }));
+	}
+
+	void Nucleus::createMaskedStore(Value *ptr, Value *val, Value *mask, unsigned int alignment)
+	{
+		ASSERT(V(ptr)->getType()->isPointerTy());
+		ASSERT(V(val)->getType()->isVectorTy());
+		ASSERT(V(mask)->getType()->isVectorTy());
+
+		auto numEls = V(mask)->getType()->getVectorNumElements();
+		auto i1Ty = ::llvm::Type::getInt1Ty(*::context);
+		auto i32Ty = ::llvm::Type::getInt32Ty(*::context);
+		auto elVecTy = V(val)->getType();
+		auto elVecPtrTy = elVecTy->getPointerTo();
+		auto i8Mask = ::builder->CreateIntCast(V(mask), ::llvm::VectorType::get(i1Ty, numEls), false); // vec<int, int, ...> -> vec<bool, bool, ...>
+		auto align = ::llvm::ConstantInt::get(i32Ty, alignment);
+		auto func = ::llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::masked_store, { elVecTy, elVecPtrTy } );
+		::builder->CreateCall(func, { V(val), V(ptr), align, i8Mask });
+	}
+
 	Value *Nucleus::createGather(Value *base, Type *elTy, Value *offsets, Value *mask, unsigned int alignment)
 	{
 		ASSERT(V(base)->getType()->isPointerTy());