Subzero MulHigh implementation for Int4/UInt4
Also add implementations of multiplication and right shift for Long type.
Bug b/126873455
Change-Id: I9952c2b9a3feca6a7741cd02e2295340935e4447
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/25988
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index b4f1971..ee4b036 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -4087,6 +4087,52 @@
}
}
+ RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
+ {
+ // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
+
+ // Scalarized implementation.
+ Int4 result;
+ result = Insert(result, Int((Long(Extract(x, 0)) * Long(Extract(y, 0))) >> Long(Int(32))), 0);
+ result = Insert(result, Int((Long(Extract(x, 1)) * Long(Extract(y, 1))) >> Long(Int(32))), 1);
+ result = Insert(result, Int((Long(Extract(x, 2)) * Long(Extract(y, 2))) >> Long(Int(32))), 2);
+ result = Insert(result, Int((Long(Extract(x, 3)) * Long(Extract(y, 3))) >> Long(Int(32))), 3);
+
+ return result;
+ }
+
+ RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
+ {
+ // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
+
+ if(false) // Partial product based implementation.
+ {
+ auto xh = x >> 16;
+ auto yh = y >> 16;
+ auto xl = x & UInt4(0x0000FFFF);
+ auto yl = y & UInt4(0x0000FFFF);
+ auto xlyh = xl * yh;
+ auto xhyl = xh * yl;
+ auto xlyhh = xlyh >> 16;
+ auto xhylh = xhyl >> 16;
+ auto xlyhl = xlyh & UInt4(0x0000FFFF);
+ auto xhyll = xhyl & UInt4(0x0000FFFF);
+ auto xlylh = (xl * yl) >> 16;
+ auto oflow = (xlyhl + xhyll + xlylh) >> 16;
+
+ return (xh * yh) + (xlyhh + xhylh) + oflow;
+ }
+
+ // Scalarized implementation.
+ Int4 result;
+ result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 0))) * Long(UInt(Extract(As<Int4>(y), 0)))) >> Long(Int(32))), 0);
+ result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 1))) * Long(UInt(Extract(As<Int4>(y), 1)))) >> Long(Int(32))), 1);
+ result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 2))) * Long(UInt(Extract(As<Int4>(y), 2)))) >> Long(Int(32))), 2);
+ result = Insert(result, Int((Long(UInt(Extract(As<Int4>(x), 3))) * Long(UInt(Extract(As<Int4>(y), 3)))) >> Long(Int(32))), 3);
+
+ return As<UInt4>(result);
+ }
+
RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
{
assert(false && "UNIMPLEMENTED"); return RValue<UShort4>(V(nullptr));
@@ -4777,6 +4823,16 @@
return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
}
+ RValue<Long> operator*(RValue<Long> lhs, RValue<Long> rhs)
+ {
+ return RValue<Long>(Nucleus::createMul(lhs.value, rhs.value));
+ }
+
+ RValue<Long> operator>>(RValue<Long> lhs, RValue<Long> rhs)
+ {
+ return RValue<Long>(Nucleus::createAShr(lhs.value, rhs.value));
+ }
+
RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
{
return lhs = lhs + rhs;