Avoid vector element insert type mismatch.
Subzero silently allows inserting a 32-bit integer into a vector with
16-bit elements. This generates a pinsrd instruction, which requires
SSE4.1, even when specifying to only support SSE2. This change emits
a pinsrw instruction instead, which is SSE2.
Bug swiftshader:20
Change-Id: I37bd0ebb6d9cfdd2cdef4788e89e9672386c685e
Reviewed-on: https://swiftshader-review.googlesource.com/8450
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index cf32cc7..d3fe63d 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -2935,7 +2935,8 @@
Short4::Short4(RValue<Int> cast)
{
Value *vector = loadValue();
- Value *insert = Nucleus::createInsertElement(vector, cast.value, 0);
+ Value *element = Nucleus::createTrunc(cast.value, Short::getType());
+ Value *insert = Nucleus::createInsertElement(vector, element, 0);
Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
storeValue(swizzle);