Assume SSE2 support is available.
Chrome and many other products require SSE2 support as a minimum. Note
that MMX checks are left in place for now. Dead code paths are removed.
Bug swiftshader:78
Change-Id: Iabd8b1dc2092949d5dba29a78e75d014e808f12c
Reviewed-on: https://swiftshader-review.googlesource.com/11068
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/LLVMReactor.cpp b/src/Reactor/LLVMReactor.cpp
index bb69182..ddae0cc 100644
--- a/src/Reactor/LLVMReactor.cpp
+++ b/src/Reactor/LLVMReactor.cpp
@@ -2553,44 +2553,28 @@
{
Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
- #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
- Constant *pack[8];
- pack[0] = Nucleus::createConstantInt(0);
- pack[1] = Nucleus::createConstantInt(2);
- pack[2] = Nucleus::createConstantInt(4);
- pack[3] = Nucleus::createConstantInt(6);
+ Value *packed;
- Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
- #else
- Value *packed;
+ // FIXME: Use Swizzle<Short8>
+ if(!CPUID::supportsSSSE3())
+ {
+ int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
+ int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
- // FIXME: Use Swizzle<Short8>
- if(!CPUID::supportsSSSE3())
- {
- int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
- int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
+ Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
+ Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
+ Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
+ packed = createSwizzle4(int4, 0x88);
+ }
+ else
+ {
+ int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
+ Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
+ packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
+ }
- Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
- Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
- Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
- packed = createSwizzle4(int4, 0x88);
- }
- else
- {
- int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
- Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
- packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
- }
-
- #if 0 // FIXME: No optimal instruction selection
- Value *qword2 = Nucleus::createBitCast(packed, T(llvm::VectorType::get(T(Long::getType()), 2)));
- Value *element = Nucleus::createExtractElement(qword2, 0);
- Value *short4 = Nucleus::createBitCast(element, Short4::getType());
- #else // FIXME: Requires SSE
- Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
- Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
- #endif
- #endif
+ Value *int2 = RValue<Int2>(Int2(As<Int4>(packed))).value;
+ Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
storeValue(short4);
}
@@ -4551,22 +4535,15 @@
RValue<Int> Extract(RValue<Int2> val, int i)
{
- if(false) // FIXME: LLVM does not generate optimal code
+ if(i == 0)
{
- return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
+ return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(llvm::VectorType::get(T(Int::getType()), 2))), Int::getType(), 0));
}
else
{
- if(i == 0)
- {
- return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(llvm::VectorType::get(T(Int::getType()), 2))), Int::getType(), 0));
- }
- else
- {
- Int2 val2 = As<Int2>(UnpackHigh(val, val));
+ Int2 val2 = As<Int2>(UnpackHigh(val, val));
- return Extract(val2, 0);
- }
+ return Extract(val2, 0);
}
}
@@ -5823,60 +5800,16 @@
Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
{
- #if 0
- Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
- #elif 0
- Value *vector = loadValue();
-
- Value *i8x = Nucleus::createExtractElement(cast.value, 0);
- Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
- Value *x = Nucleus::createInsertElement(vector, f32x, 0);
-
- Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
- Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
- Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
-
- Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
- Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
- Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
-
- Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
- Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
- Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
- #else
- Value *a = Int4(cast).loadValue();
- Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
- #endif
+ Value *a = Int4(cast).loadValue();
+ Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
storeValue(xyzw);
}
Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
{
- #if 0
- Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
- #elif 0
- Value *vector = loadValue();
-
- Value *i8x = Nucleus::createExtractElement(cast.value, 0);
- Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
- Value *x = Nucleus::createInsertElement(vector, f32x, 0);
-
- Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
- Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
- Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
-
- Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
- Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
- Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
-
- Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
- Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
- Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
- #else
- Value *a = Int4(cast).loadValue();
- Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
- #endif
+ Value *a = Int4(cast).loadValue();
+ Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
storeValue(xyzw);
}
@@ -6403,19 +6336,9 @@
RValue<Int4> cvtps2dq(RValue<Float4> val)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
+ llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
- return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
- }
- else
- {
- Int2 lo = x86::cvtps2pi(val);
- Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
-
- return Int4(lo, hi);
- }
+ return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
}
RValue<Float> rcpss(RValue<Float> val)
@@ -6868,25 +6791,9 @@
RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
+ llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
- return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
- }
- else
- {
- Int2 loX = Int2(x);
- Int2 hiX = Int2(Swizzle(x, 0xEE));
-
- Int2 loY = Int2(y);
- Int2 hiY = Int2(Swizzle(y, 0xEE));
-
- Short4 lo = x86::packssdw(loX, hiX);
- Short4 hi = x86::packssdw(loY, hiY);
-
- return Short8(lo, hi);
- }
+ return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
}
RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
@@ -6971,22 +6878,9 @@
RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
+ llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
- return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
- }
- else
- {
- Int2 lo = Int2(x);
- Int2 hi = Int2(Swizzle(x, 0xEE));
-
- lo = x86::pslld(lo, y);
- hi = x86::pslld(hi, y);
-
- return Int4(lo, hi);
- }
+ return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
@@ -6998,22 +6892,9 @@
RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
+ llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
- return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
- }
- else
- {
- Int2 lo = Int2(x);
- Int2 hi = Int2(Swizzle(x, 0xEE));
-
- lo = x86::psrad(lo, y);
- hi = x86::psrad(hi, y);
-
- return Int4(lo, hi);
- }
+ return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
@@ -7025,22 +6906,9 @@
RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
{
- if(CPUID::supportsSSE2())
- {
- llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
+ llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
- return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
- }
- else
- {
- UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
- UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
-
- lo = x86::psrld(lo, y);
- hi = x86::psrld(hi, y);
-
- return UInt4(lo, hi);
- }
+ return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
}
RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)