Handle instrumentation of scalar single-precision (_ss) intrinsics

Instrumentation of intrinsics such as x86_sse41_round_sd was already
handled by https://reviews.llvm.org/D82398, but not its single-
precision counterpart. Also rcpss and rsqrtss are useful.

Bug: b/172238865
Change-Id: I7d1395189270678d33e9d573f67b6f2dfeaf996a
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/65569
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Commit-Queue: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
diff --git a/third_party/llvm-10.0/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/third_party/llvm-10.0/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index f581142..347af58 100644
--- a/third_party/llvm-10.0/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/third_party/llvm-10.0/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3042,6 +3042,44 @@
     SOC.Done(&I);
   }
 
+  // Instrument mm*_sd|ss intrinsics
+  void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    unsigned Width =
+        cast<VectorType>(I.getArgOperand(0)->getType())->getNumElements();
+    Value *First = getShadow(&I, 0);
+    Value *Second = getShadow(&I, 1);
+    // First element of second operand, remaining elements of first operand
+    SmallVector<uint32_t, 16> Mask;
+    Mask.push_back(Width);
+    for (uint32_t i = 1; i < Width; i++) {
+      Mask.push_back(i);
+    }
+    Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
+
+    setShadow(&I, Shadow);
+    setOriginForNaryOp(I);
+  }
+
+  void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    unsigned Width =
+        cast<VectorType>(I.getArgOperand(0)->getType())->getNumElements();
+    Value *First = getShadow(&I, 0);
+    Value *Second = getShadow(&I, 1);
+    Value *OrShadow = IRB.CreateOr(First, Second);
+    // First element of both OR'd together, remaining elements of first operand
+    SmallVector<uint32_t, 16> Mask;
+    Mask.push_back(Width);
+    for (uint32_t i = 1; i < Width; i++) {
+      Mask.push_back(i);
+    }
+    Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
+
+    setShadow(&I, Shadow);
+    setOriginForNaryOp(I);
+  }
+
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
     case Intrinsic::lifetime_start:
@@ -3281,6 +3319,19 @@
       handlePclmulIntrinsic(I);
       break;
 
+    case Intrinsic::x86_sse41_round_sd:
+    case Intrinsic::x86_sse41_round_ss:
+    case Intrinsic::x86_sse_rcp_ss:
+    case Intrinsic::x86_sse_rsqrt_ss:
+      handleUnarySdSsIntrinsic(I);
+      break;
+    case Intrinsic::x86_sse2_max_sd:
+    case Intrinsic::x86_sse_max_ss:
+    case Intrinsic::x86_sse2_min_sd:
+    case Intrinsic::x86_sse_min_ss:
+      handleBinarySdSsIntrinsic(I);
+      break;
+
     case Intrinsic::is_constant:
       // The result of llvm.is.constant() is always defined.
       setShadow(&I, getCleanShadow(&I));