Fully support all 16-bit packed texture formats
Vulkan lists the following formats as 16-bit packed formats:
* VK_FORMAT_R4G4B4A4_UNORM_PACK16
* VK_FORMAT_B4G4R4A4_UNORM_PACK16
* VK_FORMAT_R5G6B5_UNORM_PACK16
* VK_FORMAT_B5G6R5_UNORM_PACK16
* VK_FORMAT_R5G5B5A1_UNORM_PACK16
* VK_FORMAT_B5G5R5A1_UNORM_PACK16
* VK_FORMAT_A1R5G5B5_UNORM_PACK16
We already support VK_FORMAT_A1R5G5B5_UNORM_PACK16, so we can use that
as a crossreference to see which tests we expect to pass.
Bug: b/139351376
Tests: dEQP-VK.*pack16*
Change-Id: Ia7b0b9efe92c7f67c803579fee7c8e0d62ef081f
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/57048
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Sean Risser <srisser@google.com>
Commit-Queue: Sean Risser <srisser@google.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 1a70606..152b7bd 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -466,11 +466,34 @@
Float(1.0f / (1 << 24)));
c.w = 1.0f;
break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF000)) >> UShort(12)));
+ c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x0F00)) >> UShort(8)));
+ c.z = Float(Int((*Pointer<UShort>(element) & UShort(0x00F0)) >> UShort(4)));
+ c.w = Float(Int(*Pointer<UShort>(element) & UShort(0x000F)));
+ break;
case VK_FORMAT_R5G6B5_UNORM_PACK16:
c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ c.z = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
+ c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
+ c.x = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
+ c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07C0)) >> UShort(6)));
+ c.z = Float(Int((*Pointer<UShort>(element) & UShort(0x003E)) >> UShort(1)));
+ c.w = Float(Int(*Pointer<UShort>(element) & UShort(0x0001)));
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ c.z = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
+ c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07C0)) >> UShort(6)));
+ c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x003E)) >> UShort(1)));
+ c.w = Float(Int(*Pointer<UShort>(element) & UShort(0x0001)));
+ break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
@@ -907,6 +930,20 @@
UShort(mask));
}
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ if(writeR && writeG && writeB)
+ {
+ *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c.zyxx), { 11, 5, 0, 0 }));
+ }
+ else
+ {
+ unsigned short mask = (writeR ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeB ? 0xF800 : 0x0000);
+ unsigned short unmask = ~mask;
+ *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
+ (UShort(PackFields(RoundInt(c.zyxx), { 11, 5, 0, 0 })) &
+ UShort(mask));
+ }
+ break;
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
if(writeRGBA)
{
diff --git a/src/Pipeline/Constants.cpp b/src/Pipeline/Constants.cpp
index ce921e6..5855fce 100644
--- a/src/Pipeline/Constants.cpp
+++ b/src/Pipeline/Constants.cpp
@@ -252,6 +252,10 @@
for(int i = 0; i < 16; i++)
{
mask5551Q[i] = word4((i & 0x1 ? 0x001F : 0) | (i & 0x2 ? 0x03E0 : 0) | (i & 0x4 ? 0x7C00 : 0) | (i & 8 ? 0x8000 : 0));
+ maskr5g5b5a1Q[i] = word4((i & 0x1 ? 0x003E : 0) | (i & 0x2 ? 0x07C0 : 0) | (i & 0x4 ? 0xF800 : 0) | (i & 8 ? 0x0001 : 0));
+ maskb5g5r5a1Q[i] = word4((i & 0x1 ? 0xF800 : 0) | (i & 0x2 ? 0x07C0 : 0) | (i & 0x4 ? 0x003E : 0) | (i & 8 ? 0x0001 : 0));
+ mask4rgbaQ[i] = word4((i & 0x1 ? 0x00F0 : 0) | (i & 0x2 ? 0x0F00 : 0) | (i & 0x4 ? 0xF000 : 0) | (i & 8 ? 0x000F : 0));
+ mask4bgraQ[i] = word4((i & 0x1 ? 0xF000 : 0) | (i & 0x2 ? 0x0F00 : 0) | (i & 0x4 ? 0x00F0 : 0) | (i & 8 ? 0x000F : 0));
}
for(int i = 0; i < 4; i++)
diff --git a/src/Pipeline/Constants.hpp b/src/Pipeline/Constants.hpp
index 5bb1f53..dc66a9c 100644
--- a/src/Pipeline/Constants.hpp
+++ b/src/Pipeline/Constants.hpp
@@ -68,9 +68,13 @@
word4 maskW01Q[4];
dword4 maskD01X[4];
word4 mask565Q[8];
- dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
- word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
- dword4 mask11X[8]; // 3 bit writemask -> B10G11R11 bit patterns, replicated 4x
+ dword2 mask10Q[16]; // 4 bit writemask -> A2B10G10R10 bit patterns, replicated 2x
+ word4 mask5551Q[16]; // 4 bit writemask -> A1R5G5B5 bit patterns, replicated 4x
+ word4 maskr5g5b5a1Q[16]; // 4 bit writemask -> R5G5B5A1 bit patterns, replicated 4x
+ word4 maskb5g5r5a1Q[16]; // 4 bit writemask -> B5G5R5A1 bit patterns, replicated 4x
+ word4 mask4rgbaQ[16]; // 4 bit writemask -> R4G4B4A4 bit patterns, replicated 4x
+ word4 mask4bgraQ[16]; // 4 bit writemask -> B4G4R4A4 bit patterns, replicated 4x
+ dword4 mask11X[8]; // 3 bit writemask -> B10G11R11 bit patterns, replicated 4x
unsigned short sRGBtoLinearFF_FF00[256];
diff --git a/src/Pipeline/PixelProgram.cpp b/src/Pipeline/PixelProgram.cpp
index 9b321f9..60e6805 100644
--- a/src/Pipeline/PixelProgram.cpp
+++ b/src/Pipeline/PixelProgram.cpp
@@ -279,6 +279,11 @@
auto format = state.colorFormat[index];
switch(format)
{
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_B8G8R8A8_UNORM:
@@ -364,6 +369,11 @@
{
case VK_FORMAT_UNDEFINED:
break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_B8G8R8A8_UNORM:
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index c24ca31..46af146 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -1082,6 +1082,82 @@
switch(state.colorFormat[index])
{
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + pitchB;
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+
+ pixel.x = (c01 & Short4(0xF000u));
+ pixel.y = (c01 & Short4(0x0F00u)) << 4;
+ pixel.z = (c01 & Short4(0x00F0u)) << 8;
+ pixel.w = (c01 & Short4(0x000Fu)) << 12;
+
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 4);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 8);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 4);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 8);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 4);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 8);
+ pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
+ pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + pitchB;
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+
+ pixel.z = (c01 & Short4(0xF000u));
+ pixel.y = (c01 & Short4(0x0F00u)) << 4;
+ pixel.x = (c01 & Short4(0x00F0u)) << 8;
+ pixel.w = (c01 & Short4(0x000Fu)) << 12;
+
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 4);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 8);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 4);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 8);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 4);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 8);
+ pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 4);
+ pixel.w |= As<Short4>(As<UShort4>(pixel.w) >> 8);
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + pitchB;
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+
+ pixel.x = (c01 & Short4(0xF800u));
+ pixel.y = (c01 & Short4(0x07C0u)) << 5;
+ pixel.z = (c01 & Short4(0x003Eu)) << 10;
+ pixel.w = ((c01 & Short4(0x0001u)) << 15) >> 15;
+
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + pitchB;
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+
+ pixel.z = (c01 & Short4(0xF800u));
+ pixel.y = (c01 & Short4(0x07C0u)) << 5;
+ pixel.x = (c01 & Short4(0x003Eu)) << 10;
+ pixel.w = ((c01 & Short4(0x0001u)) << 15) >> 15;
+
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 5);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 10);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+ break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
buffer += 2 * x;
buffer2 = buffer + pitchB;
@@ -1118,6 +1194,24 @@
pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ buffer += 2 * x;
+ buffer2 = buffer + pitchB;
+ c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
+
+ pixel.z = c01 & Short4(0xF800u);
+ pixel.y = (c01 & Short4(0x07E0u)) << 5;
+ pixel.x = (c01 & Short4(0x001Fu)) << 11;
+ pixel.w = Short4(0xFFFFu);
+
+ // Expand to 16 bit range
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 5);
+ pixel.x |= As<Short4>(As<UShort4>(pixel.x) >> 10);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 6);
+ pixel.y |= As<Short4>(As<UShort4>(pixel.y) >> 12);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 5);
+ pixel.z |= As<Short4>(As<UShort4>(pixel.z) >> 10);
+ break;
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
buffer += 4 * x;
@@ -1386,12 +1480,22 @@
current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 10) + Short4(0x0020);
current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 2) + Short4(0x2000);
break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 4) + Short4(0x0800);
+ current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 4) + Short4(0x0800);
+ current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 4) + Short4(0x0800);
+ current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 4) + Short4(0x0800);
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 5) + Short4(0x0400);
current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 5) + Short4(0x0400);
current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 1) + Short4(0x4000);
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 5) + Short4(0x0400);
current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 6) + Short4(0x0200);
@@ -1406,6 +1510,46 @@
switch(state.colorFormat[index])
{
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ {
+ current.x = As<UShort4>(current.x & Short4(0xF000));
+ current.y = As<UShort4>(current.y & Short4(0xF000)) >> 4;
+ current.z = As<UShort4>(current.z & Short4(0xF000)) >> 8;
+ current.w = As<UShort4>(current.w & Short4(0xF000u)) >> 12;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ {
+ current.z = As<UShort4>(current.z & Short4(0xF000));
+ current.y = As<UShort4>(current.y & Short4(0xF000)) >> 4;
+ current.x = As<UShort4>(current.x & Short4(0xF000)) >> 8;
+ current.w = As<UShort4>(current.w & Short4(0xF000u)) >> 12;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ {
+ current.x = As<UShort4>(current.x & Short4(0xF800));
+ current.y = As<UShort4>(current.y & Short4(0xF800)) >> 5;
+ current.z = As<UShort4>(current.z & Short4(0xF800)) >> 10;
+ current.w = As<UShort4>(current.w & Short4(0x8000u)) >> 15;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ {
+ current.z = As<UShort4>(current.z & Short4(0xF800));
+ current.y = As<UShort4>(current.y & Short4(0xF800)) >> 5;
+ current.x = As<UShort4>(current.x & Short4(0xF800)) >> 10;
+ current.w = As<UShort4>(current.w & Short4(0x8000u)) >> 15;
+
+ current.x = current.x | current.y | current.z | current.w;
+ }
+ break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
{
current.w = current.w & Short4(0x8000u);
@@ -1425,6 +1569,15 @@
current.x = current.x | current.y | current.z;
}
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ {
+ current.z = current.z & Short4(0xF800u);
+ current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
+ current.x = As<UShort4>(current.x) >> 11;
+
+ current.x = current.x | current.y | current.z;
+ }
+ break;
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
if(rgbaWriteMask == 0x7)
@@ -1573,6 +1726,114 @@
switch(state.colorFormat[index])
{
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4rgbaQ[bgraWriteMask & 0xF][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, mask4bgraQ[bgraWriteMask & 0xF][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, maskr5g5b5a1Q[bgraWriteMask & 0xF][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ {
+ buffer += 2 * x;
+ Int value = *Pointer<Int>(buffer);
+
+ Int channelMask = *Pointer<Int>(constants + OFFSET(Constants, maskb5g5r5a1Q[bgraWriteMask & 0xF][0]));
+
+ Int c01 = Extract(As<Int2>(current.x), 0);
+ Int mask01 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][0]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask01 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c01 & mask01) | (value & ~mask01);
+
+ buffer += pitchB;
+ value = *Pointer<Int>(buffer);
+
+ Int c23 = Extract(As<Int2>(current.x), 1);
+ Int mask23 = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[0][2]) + xMask * 8);
+ if(bgraWriteMask != 0x0000000F)
+ {
+ mask23 &= channelMask;
+ }
+ *Pointer<Int>(buffer) = (c23 & mask23) | (value & ~mask23);
+ }
+ break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
{
buffer += 2 * x;
diff --git a/src/Pipeline/SamplerCore.cpp b/src/Pipeline/SamplerCore.cpp
index d821e46..9043038 100644
--- a/src/Pipeline/SamplerCore.cpp
+++ b/src/Pipeline/SamplerCore.cpp
@@ -152,16 +152,20 @@
switch(state.textureFormat)
{
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
c.x *= Float4(1.0f / 0xF800);
c.y *= Float4(1.0f / 0xFC00);
c.z *= Float4(1.0f / 0xF800);
break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
c.x *= Float4(1.0f / 0xF000);
c.y *= Float4(1.0f / 0xF000);
c.z *= Float4(1.0f / 0xF000);
c.w *= Float4(1.0f / 0xF000);
break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
c.x *= Float4(1.0f / 0xF800);
c.y *= Float4(1.0f / 0xF800);
@@ -214,16 +218,20 @@
switch(state.textureFormat)
{
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF000);
c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF000);
c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF000);
c.w = Float4(As<UShort4>(cs.w)) * Float4(1.0f / 0xF000);
break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xF800);
@@ -1526,12 +1534,35 @@
c.y = (c.x & Short4(0x07E0u)) << 5;
c.x = (c.x & Short4(0xF800u));
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ c.z = (c.x & Short4(0xF800u));
+ c.y = (c.x & Short4(0x07E0u)) << 5;
+ c.x = (c.x & Short4(0x001Fu)) << 11;
+ break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ c.w = (c.x << 12) & Short4(0xF000u);
+ c.z = (c.x << 8) & Short4(0xF000u);
+ c.y = (c.x << 4) & Short4(0xF000u);
+ c.x = (c.x) & Short4(0xF000u);
+ break;
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
c.w = (c.x << 12) & Short4(0xF000u);
c.z = (c.x) & Short4(0xF000u);
c.y = (c.x << 4) & Short4(0xF000u);
c.x = (c.x << 8) & Short4(0xF000u);
break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ c.w = (c.x << 15) & Short4(0x8000u);
+ c.z = (c.x << 10) & Short4(0xF800u);
+ c.y = (c.x << 5) & Short4(0xF800u);
+ c.x = (c.x) & Short4(0xF800u);
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ c.w = (c.x << 15) & Short4(0x8000u);
+ c.z = (c.x) & Short4(0xF800u);
+ c.y = (c.x << 5) & Short4(0xF800u);
+ c.x = (c.x << 10) & Short4(0xF800u);
+ break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
c.w = (c.x) & Short4(0x8000u);
c.z = (c.x << 11) & Short4(0xF800u);
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index 067eaea..245d110 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -979,12 +979,42 @@
dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
break;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ dst.move(2, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ break;
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0] >> 4) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ dst.move(2, SIMD::Float((packed[0] >> 12) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0xF)) * SIMD::Float(1.0f / 0xF));
+ break;
case VK_FORMAT_R5G6B5_UNORM_PACK16:
dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
dst.move(3, SIMD::Float(1.0f));
break;
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
+ dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(3, SIMD::Float(1.0f));
+ break;
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(2, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
+ break;
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+ dst.move(0, SIMD::Float((packed[0] >> 1) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(1, SIMD::Float((packed[0] >> 6) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(2, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
+ dst.move(3, SIMD::Float((packed[0]) & SIMD::Int(0x1)));
+ break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
diff --git a/src/Vulkan/VkFormat.cpp b/src/Vulkan/VkFormat.cpp
index 48340ab..24ba6a2 100644
--- a/src/Vulkan/VkFormat.cpp
+++ b/src/Vulkan/VkFormat.cpp
@@ -226,6 +226,8 @@
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
return VK_FORMAT_R8G8_UNORM;
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ return VK_FORMAT_R4G4B4A4_UNORM_PACK16;
default:
UNSUPPORTED("format %d", int(format));
break;
@@ -2007,6 +2009,11 @@
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
return true;
default:
return false;
@@ -2022,8 +2029,12 @@
switch(format)
{
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
return true;
case VK_FORMAT_R8G8_SINT:
@@ -2079,6 +2090,10 @@
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
@@ -2127,6 +2142,10 @@
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SNORM:
case VK_FORMAT_R8_UINT:
@@ -2202,6 +2221,10 @@
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SNORM:
case VK_FORMAT_R8_UINT:
@@ -2305,6 +2328,10 @@
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SNORM:
case VK_FORMAT_R8G8B8A8_UINT:
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 3eaff75..5468209 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -1105,8 +1105,12 @@
switch(format)
{
// Formats which can be sampled *and* filtered
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SRGB:
@@ -1311,6 +1315,11 @@
{
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+ case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+ case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+ case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8B8A8_UNORM: