Implement support for YV12 texture sampling. Bug 21572252 Change-Id: Iaf54b4d960dbc243c40f981e1c73c199481e2d28 Reviewed-on: https://swiftshader-review.googlesource.com/3930 Tested-by: Nicolas Capens <capn@google.com> Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/Common/Math.hpp b/src/Common/Math.hpp index e60c691..07cb7a0 100644 --- a/src/Common/Math.hpp +++ b/src/Common/Math.hpp
@@ -281,6 +281,12 @@ } uint64_t FNV_1a(const unsigned char *data, int size); // Fowler-Noll-Vo hash function + + // Round up to the next multiple of alignment + inline unsigned int align(unsigned int value, unsigned int alignment) + { + return ((value + alignment - 1) / alignment) * alignment; + } } #endif // sw_Math_hpp
diff --git a/src/OpenGL/common/AndroidCommon.cpp b/src/OpenGL/common/AndroidCommon.cpp index 8a79cac..dd2c59f 100644 --- a/src/OpenGL/common/AndroidCommon.cpp +++ b/src/OpenGL/common/AndroidCommon.cpp
@@ -3,7 +3,10 @@ #include "GL/glext.h" #include "EGL/egl.h" -#define GL_RGB565_OES 0x8D62 +#define GL_RGB565 0x8D62 +#define SW_YV12_BT601 0x32315659 // YCrCb 4:2:0 Planar, 16-byte aligned, BT.601 color space, studio swing +#define SW_YV12_BT709 0x48315659 // YCrCb 4:2:0 Planar, 16-byte aligned, BT.709 color space, studio swing +#define SW_YV12_JFIF 0x4A315659 // YCrCb 4:2:0 Planar, 16-byte aligned, BT.601 color space, full swing #include "AndroidCommon.hpp" @@ -25,16 +28,17 @@ case HAL_PIXEL_FORMAT_RGB_565: #if LATER if (GrallocModule::getInstance()->supportsConversion()) { - return GL_RGB565_OES; + return GL_RGB565; } else { ALOGE("%s badness converting gralloc not supported for RGB_565", __FUNCTION__); - return GL_RGB565_OES; + return GL_RGB565; } #else - return GL_RGB565_OES; + return GL_RGB565; #endif case HAL_PIXEL_FORMAT_YV12: + return SW_YV12_BT601; case HAL_PIXEL_FORMAT_BLOB: case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED: default: @@ -66,6 +70,7 @@ return GL_UNSIGNED_SHORT_5_6_5; #endif case HAL_PIXEL_FORMAT_YV12: + return GL_UNSIGNED_BYTE; case HAL_PIXEL_FORMAT_BLOB: case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED: default: @@ -73,49 +78,3 @@ } return GL_UNSIGNED_BYTE; } - -// Used in V1 & V2 Context.cpp -GLenum isSupportedAndroidBuffer(GLuint name) -{ - ANativeWindowBuffer *nativeBuffer = reinterpret_cast<ANativeWindowBuffer*>(name); - - if (!name) - { - ALOGE("badness %s called with name==NULL %s:%d", __FUNCTION__, __FILE__, __LINE__); - return EGL_BAD_PARAMETER; - } - if (nativeBuffer->common.magic != ANDROID_NATIVE_BUFFER_MAGIC) - { - ALOGE("badness %s failed: bad magic=%x, expected=%x", - __FUNCTION__, nativeBuffer->common.magic, ANDROID_NATIVE_BUFFER_MAGIC); - return EGL_BAD_PARAMETER; - } - - if (nativeBuffer->common.version != sizeof(ANativeWindowBuffer)) - { - ALOGE("badness %s failed: bad size=%d, expected=%d", - __FUNCTION__, nativeBuffer->common.version, sizeof(ANativeWindowBuffer)); - return EGL_BAD_PARAMETER; - } - - switch(nativeBuffer->format) - { - case HAL_PIXEL_FORMAT_RGBA_8888: - case HAL_PIXEL_FORMAT_RGBX_8888: - return EGL_SUCCESS; - case HAL_PIXEL_FORMAT_RGB_565: -#if LATER - if (GrallocModule::getInstance()->supportsConversion()) { - return EGL_SUCCESS; - } else { - ALOGE("badness %s failed: conversion not supported", __FUNCTION__ ); - return EGL_BAD_PARAMETER; - } -#else - return EGL_SUCCESS; -#endif - default: - ALOGE("badness %s failed: bad format=%x", __FUNCTION__, nativeBuffer->format); - return EGL_BAD_PARAMETER; - } -}
diff --git a/src/OpenGL/common/AndroidCommon.hpp b/src/OpenGL/common/AndroidCommon.hpp index e02171d..15c6224 100644 --- a/src/OpenGL/common/AndroidCommon.hpp +++ b/src/OpenGL/common/AndroidCommon.hpp
@@ -1,18 +1,10 @@ #ifndef ANDROID_COMMON #define ANDROID_COMMON -namespace egl -{ -class Image; -} - // Used internally GLenum getColorFormatFromAndroid(int format); // Used internally GLenum getPixelFormatFromAndroid(int format); -// Used in V1 & V2 Context.cpp -GLenum isSupportedAndroidBuffer(GLuint name); - #endif // ANDROID_COMMON
diff --git a/src/OpenGL/common/Image.cpp b/src/OpenGL/common/Image.cpp index 691e77e..78122d2 100644 --- a/src/OpenGL/common/Image.cpp +++ b/src/OpenGL/common/Image.cpp
@@ -470,6 +470,18 @@ { return sw::FORMAT_A8; } + else if(format == SW_YV12_BT601) + { + return sw::FORMAT_YV12_BT601; + } + else if(format == SW_YV12_BT709) + { + return sw::FORMAT_YV12_BT709; + } + else if(format == SW_YV12_JFIF) + { + return sw::FORMAT_YV12_JFIF; + } else UNREACHABLE(format); } else if(type == GL_UNSIGNED_SHORT || type == GL_UNSIGNED_INT) @@ -506,7 +518,7 @@ } // Returns the size, in bytes, of a single texel in an Image - int ComputePixelSize(GLenum format, GLenum type) + static int ComputePixelSize(GLenum format, GLenum type) { switch(type) {
diff --git a/src/OpenGL/common/Image.hpp b/src/OpenGL/common/Image.hpp index 09e3012..830ef2c 100644 --- a/src/OpenGL/common/Image.hpp +++ b/src/OpenGL/common/Image.hpp
@@ -22,6 +22,11 @@ #define LOGLOCK(...) #endif +// Implementation-defined formats +#define SW_YV12_BT601 0x32315659 // YCrCb 4:2:0 Planar, 16-byte aligned, BT.601 color space, studio swing +#define SW_YV12_BT709 0x48315659 // YCrCb 4:2:0 Planar, 16-byte aligned, BT.709 color space, studio swing +#define SW_YV12_JFIF 0x4A315659 // YCrCb 4:2:0 Planar, 16-byte aligned, BT.601 color space, full swing + namespace egl { // Types common between gl.h and gl2.h @@ -31,7 +36,6 @@ typedef int GLsizei; sw::Format SelectInternalFormat(GLenum format, GLenum type); -int ComputePixelSize(GLenum format, GLenum type); GLsizei ComputePitch(GLsizei width, GLenum format, GLenum type, GLint alignment); GLsizei ComputeCompressedPitch(GLsizei width, GLenum format); GLsizei ComputeCompressedSize(GLsizei width, GLsizei height, GLenum format);
diff --git a/src/OpenGL/libGLES_CM/utilities.cpp b/src/OpenGL/libGLES_CM/utilities.cpp index ca3d892..729c9ef 100644 --- a/src/OpenGL/libGLES_CM/utilities.cpp +++ b/src/OpenGL/libGLES_CM/utilities.cpp
@@ -177,6 +177,9 @@ case GL_RGB: case GL_RGB565_OES: // GL_OES_framebuffer_object case GL_RGB8_OES: // GL_OES_rgb8_rgba8 + case SW_YV12_BT601: + case SW_YV12_BT709: + case SW_YV12_JFIF: return true; default: return false;
diff --git a/src/Renderer/Sampler.cpp b/src/Renderer/Sampler.cpp index 9f26e4e..f4d5061 100644 --- a/src/Renderer/Sampler.cpp +++ b/src/Renderer/Sampler.cpp
@@ -34,7 +34,7 @@ Sampler::Sampler() { // FIXME: Mipmap::init - static unsigned int zero = 0x00FF00FF; + static const unsigned int zero = 0x00FF00FF; for(int level = 0; level < MIPMAP_LEVELS; level++) { @@ -210,6 +210,34 @@ mipmap.sliceP[0] = sliceP; mipmap.sliceP[1] = sliceP; + + if(internalTextureFormat == FORMAT_YV12_BT601 || + internalTextureFormat == FORMAT_YV12_BT709 || + internalTextureFormat == FORMAT_YV12_JFIF) + { + unsigned int YStride = align(width, 16); + unsigned int YSize = YStride * height; + unsigned int CStride = align(YStride / 2, 16); + unsigned int CSize = CStride * height / 2; + + mipmap.buffer[1] = (byte*)mipmap.buffer[0] + YSize; + mipmap.buffer[2] = (byte*)mipmap.buffer[1] + CSize; + + texture.mipmap[1].uFrac = texture.mipmap[0].uFrac + 1; + texture.mipmap[1].vFrac = texture.mipmap[0].vFrac + 1; + texture.mipmap[1].width[0] = width / 2; + texture.mipmap[1].width[1] = width / 2; + texture.mipmap[1].width[2] = width / 2; + texture.mipmap[1].width[3] = width / 2; + texture.mipmap[1].height[0] = height / 2; + texture.mipmap[1].height[1] = height / 2; + texture.mipmap[1].height[2] = height / 2; + texture.mipmap[1].height[3] = height / 2; + texture.mipmap[1].onePitchP[0] = 1; + texture.mipmap[1].onePitchP[1] = CStride; + texture.mipmap[1].onePitchP[2] = 1; + texture.mipmap[1].onePitchP[3] = CStride; + } } }
diff --git a/src/Renderer/Sampler.hpp b/src/Renderer/Sampler.hpp index 7cb0ab6..ae8602b 100644 --- a/src/Renderer/Sampler.hpp +++ b/src/Renderer/Sampler.hpp
@@ -19,7 +19,7 @@ { struct Mipmap { - void *buffer[6]; + const void *buffer[6]; union {
diff --git a/src/Renderer/Surface.cpp b/src/Renderer/Surface.cpp index 8ab1319..99dafd3 100644 --- a/src/Renderer/Surface.cpp +++ b/src/Renderer/Surface.cpp
@@ -1148,6 +1148,9 @@ case FORMAT_DF16S8: return 2; case FORMAT_INTZ: return 4; case FORMAT_S8: return 1; + case FORMAT_YV12_BT601: return 1; // Y plane only + case FORMAT_YV12_BT709: return 1; // Y plane only + case FORMAT_YV12_JFIF: return 1; // Y plane only default: ASSERT(false); } @@ -1159,7 +1162,7 @@ { if(target || isDepth(format) || isStencil(format)) { - width = ((width + 1) & ~1); + width = align(width, 2); } switch(format) @@ -1178,6 +1181,10 @@ return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row case FORMAT_ATI2: return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: + return align(width, 16); default: return bytes(format) * width; } @@ -2073,8 +2080,8 @@ unsigned int Surface::size(int width, int height, int depth, Format format) { // Dimensions rounded up to multiples of 4, used for compressed formats - int width4 = (width + 3) & ~3; - int height4 = (height + 3) & ~3; + int width4 = align(width, 4); + int height4 = align(height, 4); switch(format) { @@ -2090,6 +2097,17 @@ #endif case FORMAT_ATI2: return width4 * height4 * depth; + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: + { + unsigned int YStride = align(width, 16); + unsigned int YSize = YStride * height; + unsigned int CStride = align(YStride / 2, 16); + unsigned int CSize = CStride * height / 2; + + return YSize + 2 * CSize; + } default: return bytes(format) * width * height * depth; } @@ -2181,6 +2199,9 @@ case FORMAT_L8: case FORMAT_L16: case FORMAT_A8L8: + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: return false; case FORMAT_R32F: case FORMAT_G32R32F: @@ -2224,6 +2245,9 @@ case FORMAT_L8: case FORMAT_L16: case FORMAT_A8L8: + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: return true; case FORMAT_V8U8: case FORMAT_X8L8V8U8: @@ -2369,6 +2393,9 @@ case FORMAT_L8: return 1; case FORMAT_L16: return 1; case FORMAT_A8L8: return 2; + case FORMAT_YV12_BT601: return 3; + case FORMAT_YV12_BT709: return 3; + case FORMAT_YV12_JFIF: return 3; default: ASSERT(false); } @@ -3338,6 +3365,9 @@ case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; + case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; + case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; + case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; default: ASSERT(false); }
diff --git a/src/Renderer/Surface.hpp b/src/Renderer/Surface.hpp index ba2d2fe..2111e3f 100644 --- a/src/Renderer/Surface.hpp +++ b/src/Renderer/Surface.hpp
@@ -125,6 +125,10 @@ // Quad layout framebuffer FORMAT_X8G8R8B8Q, FORMAT_A8G8R8B8Q, + // YUV formats + FORMAT_YV12_BT601, + FORMAT_YV12_BT709, + FORMAT_YV12_JFIF, // Full-swing BT.601 FORMAT_LAST = FORMAT_A8G8R8B8Q }; @@ -207,7 +211,6 @@ void *lockStencil(int front, Accessor client); void unlockStencil(); inline int getStencilPitchB() const; - inline int getStencilPitchP() const; inline int getStencilSliceB() const; inline int getMultiSampleCount() const; @@ -501,11 +504,6 @@ return stencil.pitchB; } - int Surface::getStencilPitchP() const - { - return stencil.pitchP; - } - int Surface::getStencilSliceB() const { return stencil.sliceB;
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp index 93a6480..b13eb1b 100644 --- a/src/Shader/SamplerCore.cpp +++ b/src/Shader/SamplerCore.cpp
@@ -172,6 +172,9 @@ case FORMAT_G8R8: case FORMAT_G16R16: case FORMAT_A16B16G16R16: + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: if(componentCount < 2) c.y = Short4(0x1000, 0x1000, 0x1000, 0x1000); if(componentCount < 3) c.z = Short4(0x1000, 0x1000, 0x1000, 0x1000); if(componentCount < 4) c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000); @@ -1689,6 +1692,101 @@ ASSERT(false); } } + else if(hasYuvFormat()) + { + // Generic YPbPr to RGB transformation + // R = Y + 2 * (1 - Kr) * Pr + // G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr + // B = Y + 2 * (1 - Kb) * Pb + + float Kb = 0.114f; + float Kr = 0.299f; + int studioSwing = 1; + + switch(state.textureFormat) + { + case FORMAT_YV12_BT601: + Kb = 0.114f; + Kr = 0.299f; + studioSwing = 1; + break; + case FORMAT_YV12_BT709: + Kb = 0.0722f; + Kr = 0.2126f; + studioSwing = 1; + break; + case FORMAT_YV12_JFIF: + Kb = 0.114f; + Kr = 0.299f; + studioSwing = 0; + break; + default: + ASSERT(false); + } + + const float Kg = 1.0f - Kr - Kb; + + const float Rr = 2 * (1 - Kr); + const float Gb = -2 * Kb * (1 - Kb) / Kg; + const float Gr = -2 * Kr * (1 - Kr) / Kg; + const float Bb = 2 * (1 - Kb); + + // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240] + const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f; + const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f; + const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f; + + const float Rv = Vv * Rr; + const float Gu = Uu * Gb; + const float Gv = Vv * Gr; + const float Bu = Uu * Bb; + + const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255; + const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255; + const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255; + + Int c0 = Int(*Pointer<Byte>(buffer[0] + index[0])); + Int c1 = Int(*Pointer<Byte>(buffer[0] + index[1])); + Int c2 = Int(*Pointer<Byte>(buffer[0] + index[2])); + Int c3 = Int(*Pointer<Byte>(buffer[0] + index[3])); + c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); + UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0))); + + computeIndices(index, uuuu, vvvv, wwww, mipmap + sizeof(Mipmap)); + c0 = Int(*Pointer<Byte>(buffer[1] + index[0])); + c1 = Int(*Pointer<Byte>(buffer[1] + index[1])); + c2 = Int(*Pointer<Byte>(buffer[1] + index[2])); + c3 = Int(*Pointer<Byte>(buffer[1] + index[3])); + c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); + UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0))); + + c0 = Int(*Pointer<Byte>(buffer[2] + index[0])); + c1 = Int(*Pointer<Byte>(buffer[2] + index[1])); + c2 = Int(*Pointer<Byte>(buffer[2] + index[2])); + c3 = Int(*Pointer<Byte>(buffer[2] + index[3])); + c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); + UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0))); + + const UShort4 yY = UShort4(iround(Yy * 0x4000)); + const UShort4 rV = UShort4(iround(Rv * 0x4000)); + const UShort4 gU = UShort4(iround(-Gu * 0x4000)); + const UShort4 gV = UShort4(iround(-Gv * 0x4000)); + const UShort4 bU = UShort4(iround(Bu * 0x4000)); + + const UShort4 r0 = UShort4(iround(-R0 * 0x4000)); + const UShort4 g0 = UShort4(iround(G0 * 0x4000)); + const UShort4 b0 = UShort4(iround(-B0 * 0x4000)); + + UShort4 y = MulHigh(Y, yY); + UShort4 r = SubSat(y + MulHigh(V, rV), r0); + UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV)); + UShort4 b = SubSat(y + MulHigh(U, bU), b0); + + c.x = Min(r, UShort4(0x3FFF)) << 2; + c.y = Min(g, UShort4(0x3FFF)) << 2; + c.z = Min(b, UShort4(0x3FFF)) << 2; + } + else ASSERT(false); } void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4]) @@ -1766,6 +1864,12 @@ if(state.textureType != TEXTURE_CUBE) { buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0])); + + if(hasYuvFormat()) + { + buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1])); + buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2])); + } } else { @@ -1927,6 +2031,9 @@ case FORMAT_V16U16: case FORMAT_A16W16V16U16: case FORMAT_Q16W16V16U16: + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: return false; default: ASSERT(false); @@ -1939,8 +2046,6 @@ { switch(state.textureFormat) { - case FORMAT_R5G6B5: - return false; case FORMAT_G8R8: case FORMAT_X8R8G8B8: case FORMAT_X8B8G8R8: @@ -1954,6 +2059,7 @@ case FORMAT_L8: case FORMAT_A8L8: return true; + case FORMAT_R5G6B5: case FORMAT_R32F: case FORMAT_G32R32F: case FORMAT_A32B32G32R32F: @@ -1966,6 +2072,9 @@ case FORMAT_V16U16: case FORMAT_A16W16V16U16: case FORMAT_Q16W16V16U16: + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: return false; default: ASSERT(false); @@ -1997,6 +2106,9 @@ case FORMAT_D32F_LOCKABLE: case FORMAT_D32FS8_TEXTURE: case FORMAT_D32FS8_SHADOW: + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: return false; case FORMAT_L16: case FORMAT_G16R16: @@ -2012,6 +2124,47 @@ return false; } + bool SamplerCore::hasYuvFormat() const + { + switch(state.textureFormat) + { + case FORMAT_YV12_BT601: + case FORMAT_YV12_BT709: + case FORMAT_YV12_JFIF: + return true; + case FORMAT_R5G6B5: + case FORMAT_G8R8: + case FORMAT_X8R8G8B8: + case FORMAT_X8B8G8R8: + case FORMAT_A8R8G8B8: + case FORMAT_A8B8G8R8: + case FORMAT_V8U8: + case FORMAT_Q8W8V8U8: + case FORMAT_X8L8V8U8: + case FORMAT_R32F: + case FORMAT_G32R32F: + case FORMAT_A32B32G32R32F: + case FORMAT_A8: + case FORMAT_R8: + case FORMAT_L8: + case FORMAT_A8L8: + case FORMAT_D32F_LOCKABLE: + case FORMAT_D32FS8_TEXTURE: + case FORMAT_D32FS8_SHADOW: + case FORMAT_L16: + case FORMAT_G16R16: + case FORMAT_A16B16G16R16: + case FORMAT_V16U16: + case FORMAT_A16W16V16U16: + case FORMAT_Q16W16V16U16: + return false; + default: + ASSERT(false); + } + + return false; + } + bool SamplerCore::isRGBComponent(int component) const { switch(state.textureFormat) @@ -2041,6 +2194,9 @@ case FORMAT_V16U16: return false; case FORMAT_A16W16V16U16: return false; case FORMAT_Q16W16V16U16: return false; + case FORMAT_YV12_BT601: return component < 3; + case FORMAT_YV12_BT709: return component < 3; + case FORMAT_YV12_JFIF: return component < 3; default: ASSERT(false); }
diff --git a/src/Shader/SamplerCore.hpp b/src/Shader/SamplerCore.hpp index 139220a..12c6cd6 100644 --- a/src/Shader/SamplerCore.hpp +++ b/src/Shader/SamplerCore.hpp
@@ -63,6 +63,7 @@ bool has16bitTextureFormat() const; bool has8bitTextureComponents() const; bool has16bitTextureComponents() const; + bool hasYuvFormat() const; bool isRGBComponent(int component) const; Pointer<Byte> &constants;