Optimize Int2 construction.
Change-Id: Ibab854164a45c998976e65b8bfec80a8a688461b
Reviewed-on: https://swiftshader-review.googlesource.com/4511
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
Tested-by: Nicolas Capens <capn@google.com>
diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
index 712dc7b..4305895 100644
--- a/src/Reactor/Nucleus.cpp
+++ b/src/Reactor/Nucleus.cpp
@@ -4623,13 +4623,23 @@
Int2::Int2(RValue<Int> lo, RValue<Int> hi)
{
- Constant *shuffle[2];
- shuffle[0] = Nucleus::createConstantInt(0);
- shuffle[1] = Nucleus::createConstantInt(1);
-
- Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
-
- storeValue(Nucleus::createBitCast(packed, Int2::getType()));
+ if(CPUID::supportsMMX2())
+ {
+ // movd mm0, lo
+ // movd mm1, hi
+ // punpckldq mm0, mm1
+ storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
+ }
+ else
+ {
+ Constant *shuffle[2];
+ shuffle[0] = Nucleus::createConstantInt(0);
+ shuffle[1] = Nucleus::createConstantInt(1);
+
+ Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
+
+ storeValue(Nucleus::createBitCast(packed, Int2::getType()));
+ }
}
RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
diff --git a/src/Shader/PixelRoutine.cpp b/src/Shader/PixelRoutine.cpp
index a0f876c..c313eb4 100644
--- a/src/Shader/PixelRoutine.cpp
+++ b/src/Shader/PixelRoutine.cpp
@@ -1013,14 +1013,14 @@
Short4 c01;
Short4 c23;
Pointer<Byte> buffer;
+ Pointer<Byte> buffer2;
switch(state.targetFormat[index])
{
case FORMAT_R5G6B5:
buffer = cBuffer + 2 * x;
- c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
- buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
- c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
+ buffer2 = buffer + *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
pixel.x = c01 & Short4(0xF800u);
pixel.y = (c01 & Short4(0x07E0u)) << 5;