Add decoder for BC6h
This passes all the non-sparse BC6h format tests in dEQP.
Bug: b/151203718
Tests: dEQP-VK.*bc6*
Change-Id: I5dacbc07bb54ff4fc384db974feaf7cfd0055e96
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/45571
Kokoro-Result: kokoro <noreply+kokoro@google.com>
Tested-by: Sean Risser <srisser@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Device/BC_Decoder.cpp b/src/Device/BC_Decoder.cpp
index 909ef8c..7a7b969 100644
--- a/src/Device/BC_Decoder.cpp
+++ b/src/Device/BC_Decoder.cpp
@@ -15,10 +15,12 @@
#include "BC_Decoder.hpp"
#include "System/Debug.hpp"
+#include "System/Math.hpp"
#include <algorithm>
#include <array>
#include <cstddef>
+#include <vector>
#include <assert.h>
#include <stdint.h>
@@ -219,6 +221,846 @@
uint64_t data;
};
+namespace BC6H {
+
+static constexpr int MaxPartitions = 64;
+
+static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
+ { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
+ { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 },
+ { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
+ { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 },
+ { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 },
+ { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 },
+ { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 },
+ { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 },
+ { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 },
+ { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
+ { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
+ { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 },
+ { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
+ { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 },
+ { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 },
+ { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 },
+ { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+ { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 },
+ { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 },
+ { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 },
+ { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 },
+ { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 },
+ { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 },
+ { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
+ { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 },
+ { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 },
+ { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 },
+ { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 },
+ { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 },
+ { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 },
+ { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
+ { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 },
+ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
+ { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 },
+ { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 },
+ { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 },
+ { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
+ { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
+ { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 },
+ { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
+ { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 },
+ { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
+ { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 },
+ { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
+ { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+ { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 },
+ { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 },
+};
+
+static constexpr uint8_t AnchorTable2[MaxPartitions] = {
+ // clang-format off
+ 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+ 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+ 0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
+ 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
+ 0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
+ 0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
+ 0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
+ 0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
+ // clang-format on
+};
+
+// 1.0f in half-precision floating point format
+static constexpr uint16_t halfFloat1 = 0x3C00;
+union Color
+{
+ struct RGBA
+ {
+ uint16_t r = 0;
+ uint16_t g = 0;
+ uint16_t b = 0;
+ const uint16_t a = halfFloat1;
+
+ RGBA(uint16_t r, uint16_t g, uint16_t b)
+ : r(r)
+ , g(g)
+ , b(b)
+ {
+ }
+
+ RGBA &operator=(const RGBA &other)
+ {
+ if(this != &other)
+ {
+ this->r = other.r;
+ this->g = other.g;
+ this->b = other.b;
+ }
+
+ return *this;
+ }
+ };
+
+ Color(uint16_t r, uint16_t g, uint16_t b)
+ : rgba(r, g, b)
+ {
+ }
+
+ Color(int r, int g, int b)
+ : rgba((uint16_t)r, (uint16_t)g, (uint16_t)b)
+ {
+ }
+
+ Color()
+ {
+ }
+
+ Color &operator=(const Color &other)
+ {
+ if(this != &other)
+ {
+ this->rgba = other.rgba;
+ }
+ return *this;
+ }
+
+ RGBA rgba;
+ uint16_t channel[4];
+};
+static_assert(sizeof(Color) == 8, "BC6h::Color must be 8 bytes long");
+
+inline int32_t extendSign(int32_t val, size_t size)
+{
+ // Suppose we have a 2-bit integer being stored in 4 bit variable:
+ // x = 0b00AB
+ //
+ // In order to sign extend x, we need to turn the 0s into A's:
+ // x_extend = 0bAAAB
+ //
+ // We can do that by flipping A in x then subtracting 0b0010 from x.
+ // Suppose A is 1:
+ // x = 0b001B
+ // x_flip = 0b000B
+ // x_minus = 0b111B
+ // Since A is flipped to 0, subtracting the mask sets it and all the bits above it to 1.
+ // And if A is 0:
+ // x = 0b000B
+ // x_flip = 0b001B
+ // x_minus = 0b000B
+ // We unset the bit we flipped, and touch no other bit
+ uint16_t mask = 1u << (size - 1);
+ return (val ^ mask) - mask;
+}
+
+static int constexpr RGBfChannels = 3;
+struct RGBf
+{
+ uint16_t channel[RGBfChannels];
+ size_t size[RGBfChannels];
+ bool isSigned;
+
+ RGBf()
+ {
+ static_assert(RGBfChannels == 3, "RGBf must have exactly 3 channels");
+ static_assert(sizeof(channel) / sizeof(channel[0]) == RGBfChannels, "RGBf must have exactly 3 channels");
+ static_assert(sizeof(channel) / sizeof(channel[0]) == sizeof(size) / sizeof(size[0]), "RGBf requires equally sized arrays for channels and channel sizes");
+
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ channel[i] = 0;
+ size[i] = 0;
+ }
+
+ isSigned = false;
+ }
+
+ void extendSign()
+ {
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ channel[i] = BC6H::extendSign(channel[i], size[i]);
+ }
+ }
+
+ // Assuming this is the delta, take the base-endpoint and transform this into
+ // a proper endpoint.
+ //
+ // The final computed endpoint is truncated to the base-endpoint's size;
+ void resolveDelta(RGBf base)
+ {
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ size[i] = base.size[i];
+ channel[i] = (base.channel[i] + channel[i]) & ((1 << base.size[i]) - 1);
+ }
+
+ // Per the spec:
+ // "For signed formats, the results of the delta calculation must be sign
+ // extended as well."
+ if(isSigned)
+ {
+ extendSign();
+ }
+ }
+
+ void unquantize()
+ {
+ if(isSigned)
+ {
+ unquantizeSigned();
+ }
+ else
+ {
+ unquantizeUnsigned();
+ }
+ }
+
+ void unquantizeUnsigned()
+ {
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ if(size[i] >= 15 || channel[i] == 0)
+ {
+ continue;
+ }
+ else if(channel[i] == ((1u << size[i]) - 1))
+ {
+ channel[i] = 0xFFFFu;
+ }
+ else
+ {
+ // Need 32 bits to avoid overflow
+ uint32_t tmp = channel[i];
+ channel[i] = (uint16_t)(((tmp << 16) + 0x8000) >> size[i]);
+ }
+ size[i] = 16;
+ }
+ }
+
+ void unquantizeSigned()
+ {
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ if(size[i] >= 16 || channel[i] == 0)
+ {
+ continue;
+ }
+
+ int16_t value = sw::bit_cast<int16_t>(channel[i]);
+ int32_t result = value;
+ bool signBit = value < 0;
+ if(signBit)
+ {
+ value = -value;
+ }
+
+ if(value >= ((1 << (size[i] - 1)) - 1))
+ {
+ result = 0x7FFF;
+ }
+ else
+ {
+ // Need 32 bits to avoid overflow
+ int32_t tmp = value;
+ result = (((tmp << 15) + 0x4000) >> (size[i] - 1));
+ }
+
+ if(signBit)
+ {
+ result = -result;
+ }
+
+ channel[i] = (uint16_t)result;
+ size[i] = 16;
+ }
+ }
+};
+
+struct Data
+{
+ uint64_t low64;
+ uint64_t high64;
+
+ Data() = default;
+ Data(uint64_t low64, uint64_t high64)
+ : low64(low64)
+ , high64(high64)
+ {
+ }
+
+ // Consumes the lowest N bits from from low64 and high64 where N is:
+ // abs(MSB - LSB)
+ // MSB and LSB come from the block description of the BC6h spec and specify
+ // the location of the bits in the returned bitstring.
+ //
+ // If MSB < LSB, then the bits are reversed. Otherwise, the bitstring is read and
+ // shifted without further modification.
+ //
+ uint32_t consumeBits(uint32_t MSB, uint32_t LSB)
+ {
+ bool reversed = MSB < LSB;
+ if(reversed)
+ {
+ std::swap(MSB, LSB);
+ }
+ ASSERT(MSB - LSB + 1 < sizeof(uint32_t) * 8);
+
+ uint32_t numBits = MSB - LSB + 1;
+ uint32_t mask = (1 << numBits) - 1;
+ // Read the low N bits
+ uint32_t bits = (low64 & mask);
+
+ low64 >>= numBits;
+ // Put the low N bits of high64 into the high 64-N bits of low64
+ low64 |= (high64 & mask) << (sizeof(high64) * 8 - numBits);
+ high64 >>= numBits;
+
+ if(reversed)
+ {
+ uint32_t tmp = 0;
+ for(uint32_t numSwaps = 0; numSwaps < numBits; numSwaps++)
+ {
+ tmp <<= 1;
+ tmp |= (bits & 1);
+ bits >>= 1;
+ }
+
+ bits = tmp;
+ }
+
+ return bits << LSB;
+ }
+};
+
+struct IndexInfo
+{
+ uint64_t value;
+ int numBits;
+};
+
+// Interpolates between two endpoints, then does a final unquantization step
+Color interpolate(RGBf e0, RGBf e1, const IndexInfo &index, bool isSigned)
+{
+ static constexpr uint32_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+ static constexpr uint32_t weights4[] = { 0, 4, 9, 13, 17, 21, 26, 30,
+ 34, 38, 43, 47, 51, 55, 60, 64 };
+ static constexpr uint32_t const *weightsN[] = {
+ nullptr, nullptr, nullptr, weights3, weights4
+ };
+ auto weights = weightsN[index.numBits];
+ ASSERT_MSG(weights != nullptr, "Unexpected number of index bits: %d", (int)index.numBits);
+ Color color;
+ uint32_t e0Weight = 64 - weights[index.value];
+ uint32_t e1Weight = weights[index.value];
+
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ int32_t e0Channel = e0.channel[i];
+ int32_t e1Channel = e1.channel[i];
+
+ if(isSigned)
+ {
+ e0Channel = extendSign(e0Channel, 16);
+ e1Channel = extendSign(e1Channel, 16);
+ }
+
+ int32_t e0Value = e0Channel * e0Weight;
+ int32_t e1Value = e1Channel * e1Weight;
+
+ uint32_t tmp = ((e0Value + e1Value + 32) >> 6);
+
+ // Need to unquantize value to limit it to the legal range of half-precision
+ // floats. We do this by scaling by 31/32 or 31/64 depending on if the value
+ // is signed or unsigned.
+ if(isSigned)
+ {
+ tmp = ((tmp & 0x80000000) != 0) ? (((~tmp + 1) * 31) >> 5) | 0x8000 : (tmp * 31) >> 5;
+ // Don't return -0.0f, just normalize it to 0.0f.
+ if(tmp == 0x8000)
+ tmp = 0;
+ }
+ else
+ {
+ tmp = (tmp * 31) >> 6;
+ }
+
+ color.channel[i] = (uint16_t)tmp;
+ }
+
+ return color;
+}
+
+enum DataType
+{
+ // Endpoints
+ EP0 = 0,
+ EP1 = 1,
+ EP2 = 2,
+ EP3 = 3,
+ Mode,
+ Partition,
+};
+
+enum Channel
+{
+ R = 0,
+ G = 1,
+ B = 2,
+ None,
+};
+
+struct DeltaBits
+{
+ size_t channel[3];
+
+ DeltaBits()
+ {
+ channel[R] = 0;
+ channel[G] = 0;
+ channel[B] = 0;
+ }
+
+ DeltaBits(int r, int g, int b)
+ {
+ channel[R] = r;
+ channel[G] = g;
+ channel[B] = b;
+ }
+};
+
+struct ModeDesc
+{
+ int number;
+ bool hasDelta;
+ int partitionCount;
+ int endpointBits;
+ int numEndpoints;
+ DeltaBits deltaBits;
+
+ ModeDesc()
+ : number(-1)
+ , hasDelta(false)
+ , partitionCount(0)
+ , endpointBits(0)
+ , numEndpoints(0)
+ {
+ }
+
+ ModeDesc(int number, bool hasDelta, int partitionCount, int endpointBits, DeltaBits deltaBits)
+ : number(number)
+ , hasDelta(hasDelta)
+ , partitionCount(partitionCount)
+ , endpointBits(endpointBits)
+ , deltaBits(deltaBits)
+ {
+ numEndpoints = partitionCount * 2;
+ }
+};
+
+struct BlockDesc
+{
+ DataType type;
+ Channel channel;
+ int MSB;
+ int LSB;
+ ModeDesc modeDesc;
+
+ BlockDesc() = default;
+
+ BlockDesc(DataType type, Channel channel, int MSB, int LSB, ModeDesc modeDesc)
+ : type(type)
+ , channel(channel)
+ , MSB(MSB)
+ , LSB(LSB)
+ , modeDesc(modeDesc)
+ {
+ }
+
+ BlockDesc(DataType type, Channel channel, int MSB, int LSB)
+ : type(type)
+ , channel(channel)
+ , MSB(MSB)
+ , LSB(LSB)
+ {
+ }
+};
+
+// Table describing the bitfields for each mode from the LSB to the MSB before
+// the index data starts.
+//
+// The numbers come from the BC6h block description. The basic format is a list of bitfield
+// descriptors of the form:
+// {Type, Channel, MSB, LSB}
+// * Type describes which endpoint this is, or if this is a mode or a partition number.
+// * Channel describes one of the 3 color channels within an endpoint
+// * MSB and LSB specificy:
+// * The size of the bitfield being read
+// * The position of the bitfield within the variable it is being read to
+// * And if the bitfield is stored in reverse bit order
+// If MSB < LSB then the bitfield is stored in reverse order. The size of the bitfield
+// is abs(MSB-LSB+1). And the position of the bitfield within the variable is
+// min(LSB, MSB).
+//
+// Invalid or reserved modes do not have any fields within them.
+static const std::vector<BlockDesc> blockDescs[32] = {
+ // clang-format off
+ // Mode 0
+ { { Mode, None, 1, 0, { 0, true, 2, 10, { 5, 5, 5 } } },
+ { EP2, G, 4, 4 }, { EP2, B, 4, 4 }, { EP3, B, 4, 4 },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+ { EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
+ { EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
+ { EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
+ { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 1
+ { { Mode, None, 1, 0, { 1, true, 2, 7, { 6, 6, 6 } } },
+ { EP2, G, 5, 5 }, { EP3, G, 5, 4 }, { EP0, R, 6, 0 },
+ { EP3, B, 1, 0 }, { EP2, B, 4, 4 }, { EP0, G, 6, 0 },
+ { EP2, B, 5, 5 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
+ { EP0, B, 6, 0 }, { EP3, B, 3, 3 }, { EP3, B, 5, 5 },
+ { EP3, B, 4, 4 }, { EP1, R, 5, 0 }, { EP2, G, 3, 0 },
+ { EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 5, 0 },
+ { EP2, B, 3, 0 }, { EP2, R, 5, 0 }, { EP3, R, 5, 0 },
+ { Partition, None, 4, 0 } },
+ // Mode 2
+ { { Mode, None, 4, 0, { 2, true, 2, 11, { 5, 4, 4 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 4, 0 }, { EP0, R, 10, 10 }, { EP2, G, 3, 0 },
+ { EP1, G, 3, 0 }, { EP0, G, 10, 10 }, { EP3, B, 0, 0 },
+ { EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
+ { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
+ { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 3
+ {
+ { Mode, None, 4, 0, { 3, false, 1, 10, { 0, 0, 0 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 9, 0 }, { EP1, G, 9, 0 }, { EP1, B, 9, 0 },
+ },
+ // Mode 4: Illegal
+ {},
+ // Mode 5: Illegal
+ {},
+ // Mode 6
+ { { Mode, None, 4, 0, { 6, true, 2, 11, { 4, 5, 4 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP3, G, 4, 4 },
+ { EP2, G, 3, 0 }, { EP1, G, 4, 0 }, { EP0, G, 10, 10 },
+ { EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
+ { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
+ { EP3, B, 0, 0 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 },
+ { EP2, G, 4, 4 }, { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 7
+ {
+ { Mode, None, 4, 0, { 7, true, 1, 11, { 9, 9, 9 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 8, 0 }, { EP0, R, 10, 10 }, { EP1, G, 8, 0 },
+ { EP0, G, 10, 10 }, { EP1, B, 8, 0 }, { EP0, B, 10, 10 },
+ },
+ // Mode 8: Illegal
+ {},
+ // Mode 9: Illegal
+ {},
+ // Mode 10
+ { { Mode, None, 4, 0, { 10, true, 2, 11, { 4, 4, 5 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP2, B, 4, 4 },
+ { EP2, G, 3, 0 }, { EP1, G, 3, 0 }, { EP0, G, 10, 10 },
+ { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
+ { EP0, B, 10, 10 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
+ { EP3, B, 1, 1 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 },
+ { EP3, B, 4, 4 }, { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 11
+ {
+ { Mode, None, 4, 0, { 11, true, 1, 12, { 8, 8, 8 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 7, 0 }, { EP0, R, 10, 11 }, { EP1, G, 7, 0 },
+ { EP0, G, 10, 11 }, { EP1, B, 7, 0 }, { EP0, B, 10, 11 },
+ },
+ // Mode 12: Illegal
+ {},
+ // Mode 13: Illegal
+ {},
+ // Mode 14
+ { { Mode, None, 4, 0, { 14, true, 2, 9, { 5, 5, 5 } } },
+ { EP0, R, 8, 0 }, { EP2, B, 4, 4 }, { EP0, G, 8, 0 },
+ { EP2, G, 4, 4 }, { EP0, B, 8, 0 }, { EP3, B, 4, 4 },
+ { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+ { EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
+ { EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
+ { EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
+ { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 15
+ {
+ { Mode, None, 4, 0, { 15, true, 1, 16, { 4, 4, 4 } } },
+ { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+ { EP1, R, 3, 0 }, { EP0, R, 10, 15 }, { EP1, G, 3, 0 },
+ { EP0, G, 10, 15 }, { EP1, B, 3, 0 }, { EP0, B, 10, 15 },
+ },
+ // Mode 16: Illegal
+ {},
+ // Mode 17: Illegal
+ {},
+ // Mode 18
+ { { Mode, None, 4, 0, { 18, true, 2, 8, { 6, 5, 5 } } },
+ { EP0, R, 7, 0 }, { EP3, G, 4, 4 }, { EP2, B, 4, 4 },
+ { EP0, G, 7, 0 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
+ { EP0, B, 7, 0 }, { EP3, B, 3, 3 }, { EP3, B, 4, 4 },
+ { EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 4, 0 },
+ { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
+ { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 5, 0 },
+ { EP3, R, 5, 0 },
+ { Partition, None, 4, 0 } },
+ // Mode 19: Reserved
+ {},
+ // Mode 20: Illegal
+ {},
+ // Mode 21: Illegal
+ {},
+ // Mode 22:
+ { { Mode, None, 4, 0, { 22, true, 2, 8, { 5, 6, 5 } } },
+ { EP0, R, 7, 0 }, { EP3, B, 0, 0 }, { EP2, B, 4, 4 },
+ { EP0, G, 7, 0 }, { EP2, G, 5, 5 }, { EP2, G, 4, 4 },
+ { EP0, B, 7, 0 }, { EP3, G, 5, 5 }, { EP3, B, 4, 4 },
+ { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+ { EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
+ { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
+ { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 23: Reserved
+ {},
+ // Mode 24: Illegal
+ {},
+ // Mode 25: Illegal
+ {},
+ // Mode 26
+ { { Mode, None, 4, 0, { 26, true, 2, 8, { 5, 5, 6 } } },
+ { EP0, R, 7, 0 }, { EP3, B, 1, 1 }, { EP2, B, 4, 4 },
+ { EP0, G, 7, 0 }, { EP2, B, 5, 5 }, { EP2, G, 4, 4 },
+ { EP0, B, 7, 0 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
+ { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+ { EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
+ { EP1, B, 5, 0 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
+ { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
+ { Partition, None, 4, 0 } },
+ // Mode 27: Reserved
+ {},
+ // Mode 28: Illegal
+ {},
+ // Mode 29: Illegal
+ {},
+ // Mode 30
+ { { Mode, None, 4, 0, { 30, false, 2, 6, { 0, 0, 0 } } },
+ { EP0, R, 5, 0 }, { EP3, G, 4, 4 }, { EP3, B, 0, 0 },
+ { EP3, B, 1, 1 }, { EP2, B, 4, 4 }, { EP0, G, 5, 0 },
+ { EP2, G, 5, 5 }, { EP2, B, 5, 5 }, { EP3, B, 2, 2 },
+ { EP2, G, 4, 4 }, { EP0, B, 5, 0 }, { EP3, G, 5, 5 },
+ { EP3, B, 3, 3 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
+ { EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 5, 0 },
+ { EP3, G, 3, 0 }, { EP1, B, 5, 0 }, { EP2, B, 3, 0 },
+ { EP2, R, 5, 0 }, { EP3, R, 5, 0 },
+ { Partition, None, 4, 0 } },
+ // Mode 31: Reserved
+ {},
+ // clang-format on
+};
+
+struct Block
+{
+ uint64_t low64;
+ uint64_t high64;
+
+ void decode(uint8_t *dst, int dstX, int dstY, int dstWidth, int dstHeight, size_t dstPitch, size_t dstBpp, bool isSigned) const
+ {
+ uint8_t mode = 0;
+ Data data(low64, high64);
+ ASSERT(dstBpp == sizeof(Color));
+
+ if((data.low64 & 0x2) == 0)
+ {
+ mode = data.consumeBits(1, 0);
+ }
+ else
+ {
+ mode = data.consumeBits(4, 0);
+ }
+
+ // Illegal or reserved mode
+ if(blockDescs[mode].size() == 0)
+ {
+ for(int y = 0; y < 4 && y + dstY < dstHeight; y++)
+ {
+ for(int x = 0; x < 4 && x + dstX < dstWidth; x++)
+ {
+ auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
+ out->rgba = { 0, 0, 0 };
+ }
+ }
+ return;
+ }
+
+ RGBf e[4];
+ e[0].isSigned = e[1].isSigned = e[2].isSigned = e[3].isSigned = isSigned;
+
+ int partition = 0;
+ ModeDesc modeDesc;
+ // For sanity checks
+ modeDesc.number = -1;
+ for(auto desc : blockDescs[mode])
+ {
+ switch(desc.type)
+ {
+ case Mode:
+ modeDesc = desc.modeDesc;
+ // Sanity check
+ ASSERT(modeDesc.number == mode);
+
+ e[0].size[0] = e[0].size[1] = e[0].size[2] = modeDesc.endpointBits;
+ for(int i = 0; i < RGBfChannels; i++)
+ {
+ if(modeDesc.hasDelta)
+ {
+ e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.deltaBits.channel[i];
+ }
+ else
+ {
+ e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.endpointBits;
+ }
+ }
+ break;
+ case Partition:
+ partition |= data.consumeBits(desc.MSB, desc.LSB);
+ break;
+ case EP0:
+ case EP1:
+ case EP2:
+ case EP3:
+ e[desc.type].channel[desc.channel] |= data.consumeBits(desc.MSB, desc.LSB);
+ break;
+ default:
+ ASSERT_MSG(false, "Unexpected enum value: %d", (int)desc.type);
+ return;
+ }
+ }
+
+ ASSERT_MSG(modeDesc.number != -1, "Failed to decode mode %d", mode);
+
+ // Sign extension
+ if(isSigned)
+ {
+ for(int ep = 0; ep < modeDesc.numEndpoints; ep++)
+ {
+ e[ep].extendSign();
+ }
+ }
+ else if(modeDesc.hasDelta)
+ {
+ // Don't sign-extend the base endpoint in an unsigned format.
+ for(int ep = 1; ep < modeDesc.numEndpoints; ep++)
+ {
+ e[ep].extendSign();
+ }
+ }
+
+ // Turn the deltas into endpoints
+ if(modeDesc.hasDelta)
+ {
+ for(int ep = 1; ep < modeDesc.numEndpoints; ep++)
+ {
+ e[ep].resolveDelta(e[0]);
+ }
+ }
+
+ for(int ep = 0; ep < modeDesc.numEndpoints; ep++)
+ {
+ e[ep].unquantize();
+ }
+
+ // Get the indices, calculate final colors, and output
+ for(int y = 0; y < 4; y++)
+ {
+ for(int x = 0; x < 4; x++)
+ {
+ int pixelNum = x + y * 4;
+ IndexInfo idx;
+ bool isAnchor = false;
+ int firstEndpoint = 0;
+ // Bc6H can have either 1 or 2 petitions depending on the mode.
+ // The number of petitions affects the number of indices with implicit
+ // leading 0 bits and the number of bits per index.
+ if(modeDesc.partitionCount == 1)
+ {
+ idx.numBits = 4;
+ // There's an implicit leading 0 bit for the first idx
+ isAnchor = (pixelNum == 0);
+ }
+ else
+ {
+ idx.numBits = 3;
+ // There are 2 indices with implicit leading 0-bits.
+ isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition]));
+ firstEndpoint = PartitionTable2[partition][pixelNum] * 2;
+ }
+
+ idx.value = data.consumeBits(idx.numBits - isAnchor - 1, 0);
+
+ // Don't exit the loop early, we need to consume these index bits regardless if
+ // we actually output them or not.
+ if((y + dstY >= dstHeight) || (x + dstX >= dstWidth))
+ {
+ continue;
+ }
+
+ Color color = interpolate(e[firstEndpoint], e[firstEndpoint + 1], idx, isSigned);
+ auto out = reinterpret_cast<Color *>(dst + dstBpp * x + dstPitch * y);
+ *out = color;
+ }
+ }
+ }
+};
+
+} // namespace BC6H
+
namespace BC7 {
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format
@@ -873,6 +1715,19 @@
}
}
break;
+ case 6: // BC6H
+ {
+ const BC6H::Block *block = reinterpret_cast<const BC6H::Block *>(src);
+ for(int y = 0; y < h; y += BlockHeight, dst += dy)
+ {
+ uint8_t *dstRow = dst;
+ for(int x = 0; x < w; x += BlockWidth, ++block, dstRow += dx)
+ {
+ block->decode(dstRow, x, y, w, h, dstPitch, dstBpp, isSigned);
+ }
+ }
+ }
+ break;
case 7: // BC7
{
const BC7::Block *block = reinterpret_cast<const BC7::Block *>(src);
diff --git a/src/Device/BC_Decoder.hpp b/src/Device/BC_Decoder.hpp
index 2b58349..cd621a8 100644
--- a/src/Device/BC_Decoder.hpp
+++ b/src/Device/BC_Decoder.hpp
@@ -23,7 +23,7 @@
/// @param dstPitch dst image pitch (bytes per row)
/// @param dstBpp dst image bytes per pixel
/// @param n n in BCn format
- /// @param isNoAlphaU BC1: true if RGB, BC2/BC3: unused, BC4/BC5: true if unsigned
+ /// @param isNoAlphaU BC1: true if RGB, BC2/BC3: unused, BC4/BC5/BC6H: true if unsigned
/// @return true if the decoding was performed
static bool Decode(const unsigned char *src, unsigned char *dst, int w, int h, int dstPitch, int dstBpp, int n, bool isNoAlphaU);
diff --git a/src/Vulkan/VkFormat.cpp b/src/Vulkan/VkFormat.cpp
index 6cf3e5c..f973516 100644
--- a/src/Vulkan/VkFormat.cpp
+++ b/src/Vulkan/VkFormat.cpp
@@ -626,7 +626,7 @@
return VK_FORMAT_R8G8B8A8_SRGB;
case VK_FORMAT_BC6H_UFLOAT_BLOCK:
case VK_FORMAT_BC6H_SFLOAT_BLOCK:
- return VK_FORMAT_R32G32B32A32_SFLOAT;
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
default:
UNSUPPORTED("format: %d", int(format));
return VK_FORMAT_UNDEFINED;
diff --git a/src/Vulkan/VkImage.cpp b/src/Vulkan/VkImage.cpp
index 68f4d76..555e7d1 100644
--- a/src/Vulkan/VkImage.cpp
+++ b/src/Vulkan/VkImage.cpp
@@ -91,8 +91,8 @@
}
// Returns true for BC1 if we have an RGB format, false for RGBA
-// Returns true for BC4 and BC5 if we have an unsigned format, false for signed
-// Ignored by BC2, BC3, BC6 and BC7
+// Returns true for BC4, BC5, BC6H if we have an unsigned format, false for signed
+// Ignored by BC2, BC3, and BC7
bool GetNoAlphaOrUnsigned(const vk::Format &format)
{
switch(format)
@@ -101,6 +101,7 @@
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
case VK_FORMAT_BC4_UNORM_BLOCK:
case VK_FORMAT_BC5_UNORM_BLOCK:
+ case VK_FORMAT_BC6H_UFLOAT_BLOCK:
return true;
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
@@ -110,7 +111,6 @@
case VK_FORMAT_BC3_SRGB_BLOCK:
case VK_FORMAT_BC4_SNORM_BLOCK:
case VK_FORMAT_BC5_SNORM_BLOCK:
- case VK_FORMAT_BC6H_UFLOAT_BLOCK:
case VK_FORMAT_BC6H_SFLOAT_BLOCK:
case VK_FORMAT_BC7_SRGB_BLOCK:
case VK_FORMAT_BC7_UNORM_BLOCK:
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 77168b1..6241cd6 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -90,7 +90,7 @@
#else
VK_FALSE, // textureCompressionASTC_LDR
#endif
- VK_FALSE, // textureCompressionBC
+ VK_TRUE, // textureCompressionBC
VK_FALSE, // occlusionQueryPrecise
VK_FALSE, // pipelineStatisticsQuery
VK_TRUE, // vertexPipelineStoresAndAtomics
@@ -530,6 +530,8 @@
case VK_FORMAT_BC4_SNORM_BLOCK:
case VK_FORMAT_BC5_UNORM_BLOCK:
case VK_FORMAT_BC5_SNORM_BLOCK:
+ case VK_FORMAT_BC6H_UFLOAT_BLOCK:
+ case VK_FORMAT_BC6H_SFLOAT_BLOCK:
case VK_FORMAT_BC7_UNORM_BLOCK:
case VK_FORMAT_BC7_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: