Add support for OpUMulExtended, OpSMulExtended
- Make the existing LLVMReactor lowering support for MulHigh on non-x86
available on x86 as well, as we don't have good intrinsics-based implementation
of 4x 32bit mul highs. At some point in the future we can rework this
to use some shuffles and a pair of pmuludq.
- Plumb through Int4 and UInt4 variants of MulHigh
- Implement SPIRV OpUMulExtended, OpSMulExtended in terms of MulHigh
Bug: b/126873455
Change-Id: I25ba0a69691e7a6f7a5542ec4a90a44ba8f68331
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/25929
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Chris Forbes <chrisforbes@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index e589d17..6831082 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -352,30 +352,6 @@
return ::builder->CreateAdd(lhs, rhs);
}
- llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
- {
- llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
- llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
-
- llvm::Value *extX, *extY;
- if (sext)
- {
- extX = ::builder->CreateSExt(x, extTy);
- extY = ::builder->CreateSExt(y, extTy);
- }
- else
- {
- extX = ::builder->CreateZExt(x, extTy);
- extY = ::builder->CreateZExt(y, extTy);
- }
-
- llvm::Value *mult = ::builder->CreateMul(extX, extY);
-
- llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
- llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getIntegerBitWidth());
- return ::builder->CreateTrunc(mulh, ty);
- }
-
llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
{
llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
@@ -447,6 +423,30 @@
}
#endif // !defined(__i386__) && !defined(__x86_64__)
#endif // REACTOR_LLVM_VERSION >= 7
+
+ llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
+ {
+ llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
+ llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
+
+ llvm::Value *extX, *extY;
+ if (sext)
+ {
+ extX = ::builder->CreateSExt(x, extTy);
+ extY = ::builder->CreateSExt(y, extTy);
+ }
+ else
+ {
+ extX = ::builder->CreateZExt(x, extTy);
+ extY = ::builder->CreateZExt(y, extTy);
+ }
+
+ llvm::Value *mult = ::builder->CreateMul(extX, extY);
+
+ llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
+ llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getBitWidth());
+ return ::builder->CreateTrunc(mulh, ty);
+ }
}
namespace rr
@@ -5715,6 +5715,18 @@
#endif
}
+ RValue<Int4> MulHigh(RValue<Int4> x, RValue<Int4> y)
+ {
+ // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
+ return As<Int4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
+ }
+
+ RValue<UInt4> MulHigh(RValue<UInt4> x, RValue<UInt4> y)
+ {
+ // TODO: For x86, build an intrinsics version of this which uses shuffles + pmuludq.
+ return As<UInt4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
+ }
+
RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
{
#if defined(__i386__) || defined(__x86_64__)