Work around MemorySanitizer scalar SSE false positives

Scalar SSE instructions only use the lowest scalar of an SSE vector
register, but MemorySanitizer doesn't recognize some of them so it will
check the entire 128-bit operand for uninitialized bits.

This change makes sure the other elements of the vector gets zero-
initialized instead of leaving it undefined. This affects Round, Trunc,
Frac, Ceil, Floor, Sqrt, and RcpSqrt scalar operations.

Note that this workaround results in MemorySanitizer marking the entire
output vector to have a well-defined value, which isn't actually the
case in non-MSan builds. Fortunately, a scalar can't be cast into a
vector (unlike our 'emulated' small vectors), so we just have to make
sure to immediately extract the scalar from the intrinsic's result.

Bug: b/172238865
Change-Id: If68388e476ac9e27e2de33ddf2efab4124540c7a
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/54269
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index d36c2e8..8564f8a 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -3322,7 +3322,7 @@
 #if defined(__i386__) || defined(__x86_64__)
 	if(CPUID::supportsSSE4_1())
 	{
-		frc = x - Floor(x);
+		frc = x - x86::floorps(x);
 	}
 	else
 	{
@@ -3656,7 +3656,17 @@
 
 RValue<Float> rcpss(RValue<Float> val)
 {
-	Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::type()))), val.value(), 0);
+	Value *undef = V(llvm::UndefValue::get(T(Float4::type())));
+
+	// TODO(b/172238865): MemorySanitizer does not support the rcpss instruction,
+	// which makes it look at the entire 128-bit input operand for undefined bits.
+	// Use zero-initialized values instead.
+	if(__has_feature(memory_sanitizer))
+	{
+		undef = Float4(0).loadValue();
+	}
+
+	Value *vector = Nucleus::createInsertElement(undef, val.value(), 0);
 
 	return RValue<Float>(Nucleus::createExtractElement(createInstruction(llvm::Intrinsic::x86_sse_rcp_ss, vector), Float::type(), 0));
 }
@@ -3668,7 +3678,17 @@
 
 RValue<Float> rsqrtss(RValue<Float> val)
 {
-	Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::type()))), val.value(), 0);
+	Value *undef = V(llvm::UndefValue::get(T(Float4::type())));
+
+	// TODO(b/172238865): MemorySanitizer does not support the rsqrtss instruction,
+	// which makes it look at the entire 128-bit input operand for undefined bits.
+	// Use zero-initialized values instead.
+	if(__has_feature(memory_sanitizer))
+	{
+		undef = Float4(0).loadValue();
+	}
+
+	Value *vector = Nucleus::createInsertElement(undef, val.value(), 0);
 
 	return RValue<Float>(Nucleus::createExtractElement(createInstruction(llvm::Intrinsic::x86_sse_rsqrt_ss, vector), Float::type(), 0));
 }
@@ -3703,6 +3723,15 @@
 	llvm::Function *roundss = llvm::Intrinsic::getDeclaration(jit->module.get(), llvm::Intrinsic::x86_sse41_round_ss);
 
 	Value *undef = V(llvm::UndefValue::get(T(Float4::type())));
+
+	// TODO(b/172238865): MemorySanitizer does not support the roundss instruction,
+	// which makes it look at the entire 128-bit input operands for undefined bits.
+	// Use zero-initialized values instead.
+	if(__has_feature(memory_sanitizer))
+	{
+		undef = Float4(0).loadValue();
+	}
+
 	Value *vector = Nucleus::createInsertElement(undef, val.value(), 0);
 
 	return RValue<Float>(Nucleus::createExtractElement(V(jit->builder->CreateCall(roundss, { V(undef), V(vector), V(Nucleus::createConstantInt(imm)) })), Float::type(), 0));