blob: 611df313d9719c3bd5d04fb6b8d1d95857d164e3 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040019#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050020#include "System/Memory.hpp"
Ben Claytonfccfc562019-12-17 20:37:31 +000021#include "Vulkan/VkBuffer.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050023#include "Vulkan/VkImage.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Alexis Hetu3716c202019-12-19 17:09:08 -050027namespace {
28rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
29{
30 return (rr::Int(ints.x) << shifts[0]) |
31 (rr::Int(ints.y) << shifts[1]) |
32 (rr::Int(ints.z) << shifts[2]) |
33 (rr::Int(ints.w) << shifts[3]);
34}
35} // namespace
36
Nicolas Capens157ba262019-12-10 17:49:14 -050037namespace sw {
38
Ben Claytonfccfc562019-12-17 20:37:31 +000039Blitter::Blitter()
40 : blitMutex()
41 , blitCache(1024)
42 , cornerUpdateMutex()
43 , cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040044{
Nicolas Capens157ba262019-12-10 17:49:14 -050045}
46
47Blitter::~Blitter()
48{
49}
50
Ben Claytonfccfc562019-12-17 20:37:31 +000051void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -050052{
53 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
54 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
55 if(dstFormat == VK_FORMAT_UNDEFINED)
Nicolas Capens68a82382018-10-02 13:16:55 -040056 {
Nicolas Capens157ba262019-12-10 17:49:14 -050057 return;
Nicolas Capens68a82382018-10-02 13:16:55 -040058 }
59
Nicolas Capens157ba262019-12-10 17:49:14 -050060 float *pPixel = static_cast<float *>(pixel);
Nicolas Capens81bc9d92019-12-16 15:05:57 -050061 if(viewFormat.isUnsignedNormalized())
Nicolas Capens68a82382018-10-02 13:16:55 -040062 {
Nicolas Capens157ba262019-12-10 17:49:14 -050063 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
64 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
65 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
66 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
67 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -050068 else if(viewFormat.isSignedNormalized())
Nicolas Capens157ba262019-12-10 17:49:14 -050069 {
70 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
71 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
72 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
73 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040074 }
75
Nicolas Capens157ba262019-12-10 17:49:14 -050076 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050077 {
Nicolas Capens157ba262019-12-10 17:49:14 -050078 return;
79 }
80
81 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
82 auto blitRoutine = getBlitRoutine(state);
83 if(!blitRoutine)
84 {
85 return;
86 }
87
Ben Claytonfccfc562019-12-17 20:37:31 +000088 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -050089 subresourceRange.aspectMask,
90 subresourceRange.baseMipLevel,
91 subresourceRange.baseArrayLayer,
92 1
93 };
94
95 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
96 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
97
98 VkRect2D area = { { 0, 0 }, { 0, 0 } };
99 if(renderArea)
100 {
101 ASSERT(subresourceRange.levelCount == 1);
102 area = *renderArea;
103 }
104
105 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
106 {
107 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
108 if(!renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -0500109 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500110 area.extent.width = extent.width;
111 area.extent.height = extent.height;
Alexis Hetu33642272019-03-01 11:55:59 -0500112 }
113
Ben Claytonfccfc562019-12-17 20:37:31 +0000114 BlitData data = {
115 pixel, nullptr, // source, dest
Chris Forbes88289192019-08-28 16:49:36 -0700116
Ben Claytonfccfc562019-12-17 20:37:31 +0000117 format.bytes(), // sPitchB
118 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
119 0, // sSliceB (unused in clear operations)
120 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -0500121
Ben Claytonfccfc562019-12-17 20:37:31 +0000122 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h
Alexis Hetu33642272019-03-01 11:55:59 -0500123
Ben Claytonfccfc562019-12-17 20:37:31 +0000124 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
125 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
Nicolas Capens157ba262019-12-10 17:49:14 -0500126
Ben Claytonfccfc562019-12-17 20:37:31 +0000127 0, 0, // sWidth, sHeight
Alexis Hetu33642272019-03-01 11:55:59 -0500128 };
129
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500130 if(renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500131 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500132 // Reinterpret layers as depth slices
133 subresLayers.baseArrayLayer = 0;
134 subresLayers.layerCount = 1;
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500135 for(uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500136 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000137 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500138 blitRoutine(&data);
Nicolas Capens68a82382018-10-02 13:16:55 -0400139 }
140 }
Nicolas Capens88ac3672019-08-01 13:22:34 -0400141 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400142 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500143 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400144 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500145 for(uint32_t depth = 0; depth < extent.depth; depth++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400146 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500147 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
148
149 blitRoutine(&data);
150 }
151 }
152 }
153 }
154}
155
Ben Claytonfccfc562019-12-17 20:37:31 +0000156bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -0500157{
158 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
159 {
160 return false;
161 }
162
Ben Claytonfccfc562019-12-17 20:37:31 +0000163 float *color = (float *)pixel;
Nicolas Capens157ba262019-12-10 17:49:14 -0500164 float r = color[0];
165 float g = color[1];
166 float b = color[2];
167 float a = color[3];
168
169 uint32_t packed;
170
171 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
172 switch(viewFormat)
173 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000174 case VK_FORMAT_R5G6B5_UNORM_PACK16:
175 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
176 ((uint16_t)(63 * g + 0.5f) << 5) |
177 ((uint16_t)(31 * r + 0.5f) << 11);
178 break;
179 case VK_FORMAT_B5G6R5_UNORM_PACK16:
180 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
181 ((uint16_t)(63 * g + 0.5f) << 5) |
182 ((uint16_t)(31 * b + 0.5f) << 11);
183 break;
184 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
185 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
186 case VK_FORMAT_R8G8B8A8_UNORM:
187 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
188 ((uint32_t)(255 * b + 0.5f) << 16) |
189 ((uint32_t)(255 * g + 0.5f) << 8) |
190 ((uint32_t)(255 * r + 0.5f) << 0);
191 break;
192 case VK_FORMAT_B8G8R8A8_UNORM:
193 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
194 ((uint32_t)(255 * r + 0.5f) << 16) |
195 ((uint32_t)(255 * g + 0.5f) << 8) |
196 ((uint32_t)(255 * b + 0.5f) << 0);
197 break;
198 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
199 packed = R11G11B10F(color);
200 break;
201 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
202 packed = RGB9E5(color);
203 break;
204 default:
205 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500206 }
207
Ben Claytonfccfc562019-12-17 20:37:31 +0000208 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -0500209 subresourceRange.aspectMask,
210 subresourceRange.baseMipLevel,
211 subresourceRange.baseArrayLayer,
212 1
213 };
214 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
215 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
216
217 VkRect2D area = { { 0, 0 }, { 0, 0 } };
218 if(renderArea)
219 {
220 ASSERT(subresourceRange.levelCount == 1);
221 area = *renderArea;
222 }
223
224 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
225 {
226 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
227 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
228 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
229 if(!renderArea)
230 {
231 area.extent.width = extent.width;
232 area.extent.height = extent.height;
233 }
234 if(dest->is3DSlice())
235 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000236 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
Nicolas Capens157ba262019-12-10 17:49:14 -0500237 }
238
239 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
240 {
241 for(uint32_t depth = 0; depth < extent.depth; depth++)
242 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000243 uint8_t *slice = (uint8_t *)dest->getTexelPointer(
244 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500245
246 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
247 {
248 uint8_t *d = slice;
249
250 switch(viewFormat.bytes())
251 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000252 case 2:
253 for(uint32_t i = 0; i < area.extent.height; i++)
254 {
255 ASSERT(d < dest->end());
256 sw::clear((uint16_t *)d, static_cast<uint16_t>(packed), area.extent.width);
257 d += rowPitchBytes;
258 }
259 break;
260 case 4:
261 for(uint32_t i = 0; i < area.extent.height; i++)
262 {
263 ASSERT(d < dest->end());
264 sw::clear((uint32_t *)d, packed, area.extent.width);
265 d += rowPitchBytes;
266 }
267 break;
268 default:
269 assert(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 }
271
272 slice += slicePitchBytes;
273 }
274 }
275 }
276 }
277
278 return true;
279}
280
281Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
282{
283 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
284
285 switch(state.sourceFormat)
286 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000287 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
288 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
289 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
290 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
291 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
292 break;
293 case VK_FORMAT_R8_SINT:
294 case VK_FORMAT_R8_SNORM:
295 c.x = Float(Int(*Pointer<SByte>(element)));
296 c.w = float(0x7F);
297 break;
298 case VK_FORMAT_R8_UNORM:
299 case VK_FORMAT_R8_UINT:
300 case VK_FORMAT_R8_SRGB:
301 c.x = Float(Int(*Pointer<Byte>(element)));
302 c.w = float(0xFF);
303 break;
304 case VK_FORMAT_R16_SINT:
305 case VK_FORMAT_R16_SNORM:
306 c.x = Float(Int(*Pointer<Short>(element)));
307 c.w = float(0x7FFF);
308 break;
309 case VK_FORMAT_R16_UNORM:
310 case VK_FORMAT_R16_UINT:
311 c.x = Float(Int(*Pointer<UShort>(element)));
312 c.w = float(0xFFFF);
313 break;
314 case VK_FORMAT_R32_SINT:
315 c.x = Float(*Pointer<Int>(element));
316 c.w = float(0x7FFFFFFF);
317 break;
318 case VK_FORMAT_R32_UINT:
319 c.x = Float(*Pointer<UInt>(element));
320 c.w = float(0xFFFFFFFF);
321 break;
322 case VK_FORMAT_B8G8R8A8_SRGB:
323 case VK_FORMAT_B8G8R8A8_UNORM:
324 c = Float4(*Pointer<Byte4>(element)).zyxw;
325 break;
326 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
327 case VK_FORMAT_R8G8B8A8_SINT:
328 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
329 case VK_FORMAT_R8G8B8A8_SNORM:
330 c = Float4(*Pointer<SByte4>(element));
331 break;
332 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
333 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
334 case VK_FORMAT_R8G8B8A8_UNORM:
335 case VK_FORMAT_R8G8B8A8_UINT:
336 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
337 case VK_FORMAT_R8G8B8A8_SRGB:
338 c = Float4(*Pointer<Byte4>(element));
339 break;
340 case VK_FORMAT_R16G16B16A16_SINT:
341 c = Float4(*Pointer<Short4>(element));
342 break;
343 case VK_FORMAT_R16G16B16A16_UNORM:
344 case VK_FORMAT_R16G16B16A16_UINT:
345 c = Float4(*Pointer<UShort4>(element));
346 break;
347 case VK_FORMAT_R32G32B32A32_SINT:
348 c = Float4(*Pointer<Int4>(element));
349 break;
350 case VK_FORMAT_R32G32B32A32_UINT:
351 c = Float4(*Pointer<UInt4>(element));
352 break;
353 case VK_FORMAT_R8G8_SINT:
354 case VK_FORMAT_R8G8_SNORM:
355 c.x = Float(Int(*Pointer<SByte>(element + 0)));
356 c.y = Float(Int(*Pointer<SByte>(element + 1)));
357 c.w = float(0x7F);
358 break;
359 case VK_FORMAT_R8G8_UNORM:
360 case VK_FORMAT_R8G8_UINT:
361 case VK_FORMAT_R8G8_SRGB:
362 c.x = Float(Int(*Pointer<Byte>(element + 0)));
363 c.y = Float(Int(*Pointer<Byte>(element + 1)));
364 c.w = float(0xFF);
365 break;
366 case VK_FORMAT_R16G16_SINT:
367 case VK_FORMAT_R16G16_SNORM:
368 c.x = Float(Int(*Pointer<Short>(element + 0)));
369 c.y = Float(Int(*Pointer<Short>(element + 2)));
370 c.w = float(0x7FFF);
371 break;
372 case VK_FORMAT_R16G16_UNORM:
373 case VK_FORMAT_R16G16_UINT:
374 c.x = Float(Int(*Pointer<UShort>(element + 0)));
375 c.y = Float(Int(*Pointer<UShort>(element + 2)));
376 c.w = float(0xFFFF);
377 break;
378 case VK_FORMAT_R32G32_SINT:
379 c.x = Float(*Pointer<Int>(element + 0));
380 c.y = Float(*Pointer<Int>(element + 4));
381 c.w = float(0x7FFFFFFF);
382 break;
383 case VK_FORMAT_R32G32_UINT:
384 c.x = Float(*Pointer<UInt>(element + 0));
385 c.y = Float(*Pointer<UInt>(element + 4));
386 c.w = float(0xFFFFFFFF);
387 break;
388 case VK_FORMAT_R32G32B32A32_SFLOAT:
389 c = *Pointer<Float4>(element);
390 break;
391 case VK_FORMAT_R32G32_SFLOAT:
392 c.x = *Pointer<Float>(element + 0);
393 c.y = *Pointer<Float>(element + 4);
394 break;
395 case VK_FORMAT_R32_SFLOAT:
396 c.x = *Pointer<Float>(element);
397 break;
398 case VK_FORMAT_R16G16B16A16_SFLOAT:
399 c.w = Float(*Pointer<Half>(element + 6));
400 case VK_FORMAT_R16G16B16_SFLOAT:
401 c.z = Float(*Pointer<Half>(element + 4));
402 case VK_FORMAT_R16G16_SFLOAT:
403 c.y = Float(*Pointer<Half>(element + 2));
404 case VK_FORMAT_R16_SFLOAT:
405 c.x = Float(*Pointer<Half>(element));
406 break;
407 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
408 c = r11g11b10Unpack(*Pointer<UInt>(element));
409 break;
410 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
411 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
412 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
413 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
414 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
415 c *= Float4(
416 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
417 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
418 // Since the 9 bit mantissa values currently stored in RGB were converted straight
419 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
420 // are (1 << 9) times too high.
421 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
422 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
423 // Exponent bias (15) + number of mantissa bits per component (9) = 24
424 Float(1.0f / (1 << 24)));
425 c.w = 1.0f;
426 break;
427 case VK_FORMAT_R5G6B5_UNORM_PACK16:
428 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
429 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
430 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
431 break;
432 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
433 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
434 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
435 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
436 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
437 break;
438 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
439 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
440 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
441 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
442 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
443 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
444 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -0500445 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
446 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
447 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
448 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
449 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
450 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
451 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000452 case VK_FORMAT_D16_UNORM:
453 c.x = Float(Int((*Pointer<UShort>(element))));
454 break;
455 case VK_FORMAT_X8_D24_UNORM_PACK32:
456 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
457 break;
458 case VK_FORMAT_D32_SFLOAT:
459 c.x = *Pointer<Float>(element);
460 break;
461 case VK_FORMAT_S8_UINT:
462 c.x = Float(Int(*Pointer<Byte>(element)));
463 break;
464 default:
465 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -0500466 }
467
468 return c;
469}
470
471void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
472{
473 bool writeR = state.writeRed;
474 bool writeG = state.writeGreen;
475 bool writeB = state.writeBlue;
476 bool writeA = state.writeAlpha;
477 bool writeRGBA = writeR && writeG && writeB && writeA;
478
479 switch(state.destFormat)
480 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000481 case VK_FORMAT_R4G4_UNORM_PACK8:
482 if(writeR | writeG)
Nicolas Capens157ba262019-12-10 17:49:14 -0500483 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000484 if(!writeR)
485 {
486 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
487 (*Pointer<Byte>(element) & Byte(0xF0));
488 }
489 else if(!writeG)
490 {
491 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
492 (Byte(RoundInt(Float(c.x))) << Byte(4));
493 }
494 else
495 {
496 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
497 (Byte(RoundInt(Float(c.x))) << Byte(4));
498 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500499 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000500 break;
501 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
502 if(writeR || writeG || writeB || writeA)
Nicolas Capens157ba262019-12-10 17:49:14 -0500503 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000504 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : (*Pointer<UShort>(element) & UShort(0x000F))) |
505 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : (*Pointer<UShort>(element) & UShort(0x00F0))) |
506 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : (*Pointer<UShort>(element) & UShort(0x0F00))) |
507 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : (*Pointer<UShort>(element) & UShort(0xF000)));
508 }
509 break;
510 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
511 if(writeRGBA)
512 {
513 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
514 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
515 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
516 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
Nicolas Capens157ba262019-12-10 17:49:14 -0500517 }
518 else
519 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000520 unsigned short mask = (writeA ? 0x000F : 0x0000) |
521 (writeR ? 0x00F0 : 0x0000) |
522 (writeG ? 0x0F00 : 0x0000) |
523 (writeB ? 0xF000 : 0x0000);
524 unsigned short unmask = ~mask;
525 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
526 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
527 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
528 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
529 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) &
530 UShort(mask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500531 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000532 break;
533 case VK_FORMAT_B8G8R8A8_SRGB:
534 case VK_FORMAT_B8G8R8A8_UNORM:
535 if(writeRGBA)
536 {
537 Short4 c0 = RoundShort4(c.zyxw);
538 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
539 }
540 else
541 {
542 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
543 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
544 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
545 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
546 }
547 break;
548 case VK_FORMAT_B8G8R8_SNORM:
549 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
550 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
551 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
552 break;
553 case VK_FORMAT_B8G8R8_UNORM:
554 case VK_FORMAT_B8G8R8_SRGB:
Nicolas Capens157ba262019-12-10 17:49:14 -0500555 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
556 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
557 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000558 break;
559 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
560 case VK_FORMAT_R8G8B8A8_UNORM:
561 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
562 case VK_FORMAT_R8G8B8A8_SRGB:
563 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
564 case VK_FORMAT_R8G8B8A8_UINT:
565 case VK_FORMAT_R8G8B8A8_USCALED:
566 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
567 if(writeRGBA)
568 {
569 Short4 c0 = RoundShort4(c);
570 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
571 }
572 else
573 {
574 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
575 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
576 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
577 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
578 }
579 break;
580 case VK_FORMAT_R32G32B32A32_SFLOAT:
581 if(writeRGBA)
582 {
583 *Pointer<Float4>(element) = c;
584 }
585 else
586 {
587 if(writeR) { *Pointer<Float>(element) = c.x; }
588 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
589 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
590 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
591 }
592 break;
593 case VK_FORMAT_R32G32B32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500594 if(writeR) { *Pointer<Float>(element) = c.x; }
595 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
596 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000597 break;
598 case VK_FORMAT_R32G32_SFLOAT:
599 if(writeR && writeG)
600 {
601 *Pointer<Float2>(element) = Float2(c);
602 }
603 else
604 {
605 if(writeR) { *Pointer<Float>(element) = c.x; }
606 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
607 }
608 break;
609 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500610 if(writeR) { *Pointer<Float>(element) = c.x; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000611 break;
612 case VK_FORMAT_R16G16B16A16_SFLOAT:
613 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500614 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000615 case VK_FORMAT_R16G16B16_SFLOAT:
616 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500617 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000618 case VK_FORMAT_R16G16_SFLOAT:
619 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500620 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000621 case VK_FORMAT_R16_SFLOAT:
622 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
623 break;
624 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500625 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500626 UInt rgb = r11g11b10Pack(c);
Nicolas Capens157ba262019-12-10 17:49:14 -0500627
628 UInt old = *Pointer<UInt>(element);
629
630 unsigned int mask = (writeR ? 0x000007FF : 0) |
631 (writeG ? 0x003FF800 : 0) |
632 (writeB ? 0xFFC00000 : 0);
633
634 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
635 }
636 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000637 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500638 {
639 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
640
641 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
642
643 constexpr int N = 9; // number of mantissa bits per component
644 constexpr int B = 15; // exponent bias
645 constexpr int E_max = 31; // maximum possible biased exponent value
646
647 // Maximum representable value.
648 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
649
650 // Clamp components to valid range. NaN becomes 0.
Ben Claytonfccfc562019-12-17 20:37:31 +0000651 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500652 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
Ben Claytonfccfc562019-12-17 20:37:31 +0000653 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500654
655 // We're reducing the mantissa to 9 bits, so we must round up if the next
656 // bit is 1. In other words add 0.5 to the new mantissa's position and
657 // allow overflow into the exponent so we can scale correctly.
658 constexpr int half = 1 << (23 - N);
659 Float red_r = As<Float>(As<Int>(red_c) + half);
660 Float green_r = As<Float>(As<Int>(green_c) + half);
661 Float blue_r = As<Float>(As<Int>(blue_c) + half);
662
663 // The largest component determines the shared exponent. It can't be lower
664 // than 0 (after bias subtraction) so also limit to the mimimum representable.
665 constexpr float min_s = 0.5f / (1 << B);
666 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
667
668 // Obtain the reciprocal of the shared exponent by inverting the bits,
669 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
670 // format has an implicit leading 1, but this shared component format does not.
671 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
672
673 UInt R9 = RoundInt(red_c * scale);
674 UInt G9 = UInt(RoundInt(green_c * scale));
675 UInt B9 = UInt(RoundInt(blue_c * scale));
676 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
677
678 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
679
680 *Pointer<UInt>(element) = E5B9G9R9;
681 }
682 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000683 case VK_FORMAT_B8G8R8A8_SNORM:
684 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
685 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
686 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
687 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
688 break;
689 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
690 case VK_FORMAT_R8G8B8A8_SINT:
691 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
692 case VK_FORMAT_R8G8B8A8_SNORM:
693 case VK_FORMAT_R8G8B8A8_SSCALED:
694 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
695 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500696 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000697 case VK_FORMAT_R8G8B8_SINT:
698 case VK_FORMAT_R8G8B8_SNORM:
699 case VK_FORMAT_R8G8B8_SSCALED:
700 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500701 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000702 case VK_FORMAT_R8G8_SINT:
703 case VK_FORMAT_R8G8_SNORM:
704 case VK_FORMAT_R8G8_SSCALED:
705 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500706 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000707 case VK_FORMAT_R8_SINT:
708 case VK_FORMAT_R8_SNORM:
709 case VK_FORMAT_R8_SSCALED:
710 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
711 break;
712 case VK_FORMAT_R8G8B8_UINT:
713 case VK_FORMAT_R8G8B8_UNORM:
714 case VK_FORMAT_R8G8B8_USCALED:
715 case VK_FORMAT_R8G8B8_SRGB:
716 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500717 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000718 case VK_FORMAT_R8G8_UINT:
719 case VK_FORMAT_R8G8_UNORM:
720 case VK_FORMAT_R8G8_USCALED:
721 case VK_FORMAT_R8G8_SRGB:
722 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500723 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000724 case VK_FORMAT_R8_UINT:
725 case VK_FORMAT_R8_UNORM:
726 case VK_FORMAT_R8_USCALED:
727 case VK_FORMAT_R8_SRGB:
728 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
729 break;
730 case VK_FORMAT_R16G16B16A16_SINT:
731 case VK_FORMAT_R16G16B16A16_SNORM:
732 case VK_FORMAT_R16G16B16A16_SSCALED:
733 if(writeRGBA)
734 {
735 *Pointer<Short4>(element) = Short4(RoundInt(c));
736 }
737 else
738 {
739 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
740 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
741 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
742 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
743 }
744 break;
745 case VK_FORMAT_R16G16B16_SINT:
746 case VK_FORMAT_R16G16B16_SNORM:
747 case VK_FORMAT_R16G16B16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500748 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
749 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
750 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000751 break;
752 case VK_FORMAT_R16G16_SINT:
753 case VK_FORMAT_R16G16_SNORM:
754 case VK_FORMAT_R16G16_SSCALED:
755 if(writeR && writeG)
756 {
757 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
758 }
759 else
760 {
761 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
762 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
763 }
764 break;
765 case VK_FORMAT_R16_SINT:
766 case VK_FORMAT_R16_SNORM:
767 case VK_FORMAT_R16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500768 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000769 break;
770 case VK_FORMAT_R16G16B16A16_UINT:
771 case VK_FORMAT_R16G16B16A16_UNORM:
772 case VK_FORMAT_R16G16B16A16_USCALED:
773 if(writeRGBA)
774 {
775 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
776 }
777 else
778 {
779 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
780 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
781 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
782 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
783 }
784 break;
785 case VK_FORMAT_R16G16B16_UINT:
786 case VK_FORMAT_R16G16B16_UNORM:
787 case VK_FORMAT_R16G16B16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500788 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
789 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
790 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000791 break;
792 case VK_FORMAT_R16G16_UINT:
793 case VK_FORMAT_R16G16_UNORM:
794 case VK_FORMAT_R16G16_USCALED:
795 if(writeR && writeG)
796 {
797 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
798 }
799 else
800 {
801 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
802 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
803 }
804 break;
805 case VK_FORMAT_R16_UINT:
806 case VK_FORMAT_R16_UNORM:
807 case VK_FORMAT_R16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500808 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000809 break;
810 case VK_FORMAT_R32G32B32A32_SINT:
811 if(writeRGBA)
812 {
813 *Pointer<Int4>(element) = RoundInt(c);
814 }
815 else
816 {
817 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
818 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
819 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
820 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
821 }
822 break;
823 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500824 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500825 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000826 case VK_FORMAT_R32G32_SINT:
827 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500828 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000829 case VK_FORMAT_R32_SINT:
830 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
831 break;
832 case VK_FORMAT_R32G32B32A32_UINT:
833 if(writeRGBA)
834 {
835 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
836 }
837 else
838 {
839 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
840 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
841 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
842 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
843 }
844 break;
845 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500846 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500847 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000848 case VK_FORMAT_R32G32_UINT:
849 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500850 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000851 case VK_FORMAT_R32_UINT:
852 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
853 break;
854 case VK_FORMAT_R5G6B5_UNORM_PACK16:
855 if(writeR && writeG && writeB)
856 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500857 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000858 }
859 else
860 {
861 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
862 unsigned short unmask = ~mask;
863 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500864 (UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000865 UShort(mask));
866 }
867 break;
868 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
869 if(writeRGBA)
870 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500871 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000872 }
873 else
874 {
875 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
876 (writeR ? 0x7C00 : 0x0000) |
877 (writeG ? 0x03E0 : 0x0000) |
878 (writeB ? 0x001F : 0x0000);
879 unsigned short unmask = ~mask;
880 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500881 (UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000882 UShort(mask));
883 }
884 break;
885 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
886 if(writeRGBA)
887 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500888 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000889 }
890 else
891 {
892 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
893 (writeR ? 0x7C00 : 0x0000) |
894 (writeG ? 0x03E0 : 0x0000) |
895 (writeB ? 0x001F : 0x0000);
896 unsigned short unmask = ~mask;
897 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500898 (UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000899 UShort(mask));
900 }
901 break;
902 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
903 if(writeRGBA)
904 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500905 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000906 }
907 else
908 {
909 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
910 (writeR ? 0x7C00 : 0x0000) |
911 (writeG ? 0x03E0 : 0x0000) |
912 (writeB ? 0x001F : 0x0000);
913 unsigned short unmask = ~mask;
914 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500915 (UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000916 UShort(mask));
917 }
918 break;
919 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
920 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
921 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
922 if(writeRGBA)
923 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500924 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000925 }
926 else
927 {
928 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
929 (writeB ? 0x3FF00000 : 0x0000) |
930 (writeG ? 0x000FFC00 : 0x0000) |
931 (writeR ? 0x000003FF : 0x0000);
932 unsigned int unmask = ~mask;
933 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500934 (As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000935 UInt(mask));
936 }
937 break;
938 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
939 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
940 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
941 if(writeRGBA)
942 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500943 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000944 }
945 else
946 {
947 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
948 (writeR ? 0x3FF00000 : 0x0000) |
949 (writeG ? 0x000FFC00 : 0x0000) |
950 (writeB ? 0x000003FF : 0x0000);
951 unsigned int unmask = ~mask;
952 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500953 (As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000954 UInt(mask));
955 }
956 break;
957 case VK_FORMAT_D16_UNORM:
958 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
959 break;
960 case VK_FORMAT_X8_D24_UNORM_PACK32:
961 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
962 break;
963 case VK_FORMAT_D32_SFLOAT:
964 *Pointer<Float>(element) = c.x;
965 break;
966 case VK_FORMAT_S8_UINT:
967 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
968 break;
969 default:
970 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
971 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500972 }
973}
974
975Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
976{
977 Int4 c(0, 0, 0, 1);
978
979 switch(state.sourceFormat)
980 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000981 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
982 case VK_FORMAT_R8G8B8A8_SINT:
983 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
984 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -0500985 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000986 case VK_FORMAT_R8G8_SINT:
987 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -0500988 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000989 case VK_FORMAT_R8_SINT:
990 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
991 break;
992 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
993 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
994 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
995 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
996 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
997 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -0500998 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
999 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 2);
1000 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1001 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 0);
1002 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1003 break;
Ben Claytonfccfc562019-12-17 20:37:31 +00001004 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1005 case VK_FORMAT_R8G8B8A8_UINT:
1006 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
1007 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001008 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001009 case VK_FORMAT_R8G8_UINT:
1010 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001011 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001012 case VK_FORMAT_R8_UINT:
1013 case VK_FORMAT_S8_UINT:
1014 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1015 break;
1016 case VK_FORMAT_R16G16B16A16_SINT:
1017 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
1018 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001019 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001020 case VK_FORMAT_R16G16_SINT:
1021 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001022 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001023 case VK_FORMAT_R16_SINT:
1024 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1025 break;
1026 case VK_FORMAT_R16G16B16A16_UINT:
1027 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
1028 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001029 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001030 case VK_FORMAT_R16G16_UINT:
1031 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001032 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001033 case VK_FORMAT_R16_UINT:
1034 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1035 break;
1036 case VK_FORMAT_R32G32B32A32_SINT:
1037 case VK_FORMAT_R32G32B32A32_UINT:
1038 c = *Pointer<Int4>(element);
1039 break;
1040 case VK_FORMAT_R32G32_SINT:
1041 case VK_FORMAT_R32G32_UINT:
1042 c = Insert(c, *Pointer<Int>(element + 4), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001043 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001044 case VK_FORMAT_R32_SINT:
1045 case VK_FORMAT_R32_UINT:
1046 c = Insert(c, *Pointer<Int>(element), 0);
1047 break;
1048 default:
1049 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001050 }
1051
1052 return c;
1053}
1054
1055void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1056{
1057 bool writeR = state.writeRed;
1058 bool writeG = state.writeGreen;
1059 bool writeB = state.writeBlue;
1060 bool writeA = state.writeAlpha;
1061 bool writeRGBA = writeR && writeG && writeB && writeA;
1062
1063 switch(state.destFormat)
1064 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001065 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001066 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
Ben Claytonfccfc562019-12-17 20:37:31 +00001067 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1068 break;
1069 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1070 case VK_FORMAT_R8G8B8A8_UINT:
1071 case VK_FORMAT_R8G8B8_UINT:
1072 case VK_FORMAT_R8G8_UINT:
1073 case VK_FORMAT_R8_UINT:
1074 case VK_FORMAT_R8G8B8A8_USCALED:
1075 case VK_FORMAT_R8G8B8_USCALED:
1076 case VK_FORMAT_R8G8_USCALED:
1077 case VK_FORMAT_R8_USCALED:
1078 case VK_FORMAT_S8_UINT:
1079 c = Min(As<UInt4>(c), UInt4(0xFF));
1080 break;
1081 case VK_FORMAT_R16G16B16A16_UINT:
1082 case VK_FORMAT_R16G16B16_UINT:
1083 case VK_FORMAT_R16G16_UINT:
1084 case VK_FORMAT_R16_UINT:
1085 case VK_FORMAT_R16G16B16A16_USCALED:
1086 case VK_FORMAT_R16G16B16_USCALED:
1087 case VK_FORMAT_R16G16_USCALED:
1088 case VK_FORMAT_R16_USCALED:
1089 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1090 break;
1091 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1092 case VK_FORMAT_R8G8B8A8_SINT:
1093 case VK_FORMAT_R8G8_SINT:
1094 case VK_FORMAT_R8_SINT:
1095 case VK_FORMAT_R8G8B8A8_SSCALED:
1096 case VK_FORMAT_R8G8B8_SSCALED:
1097 case VK_FORMAT_R8G8_SSCALED:
1098 case VK_FORMAT_R8_SSCALED:
1099 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1100 break;
1101 case VK_FORMAT_R16G16B16A16_SINT:
1102 case VK_FORMAT_R16G16B16_SINT:
1103 case VK_FORMAT_R16G16_SINT:
1104 case VK_FORMAT_R16_SINT:
1105 case VK_FORMAT_R16G16B16A16_SSCALED:
1106 case VK_FORMAT_R16G16B16_SSCALED:
1107 case VK_FORMAT_R16G16_SSCALED:
1108 case VK_FORMAT_R16_SSCALED:
1109 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1110 break;
1111 default:
1112 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001113 }
1114
1115 switch(state.destFormat)
1116 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001117 case VK_FORMAT_B8G8R8A8_SINT:
1118 case VK_FORMAT_B8G8R8A8_SSCALED:
1119 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001120 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001121 case VK_FORMAT_B8G8R8_SINT:
1122 case VK_FORMAT_B8G8R8_SSCALED:
1123 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1124 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1125 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1126 break;
1127 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1128 case VK_FORMAT_R8G8B8A8_SINT:
1129 case VK_FORMAT_R8G8B8A8_SSCALED:
1130 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1131 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001132 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001133 case VK_FORMAT_R8G8B8_SINT:
1134 case VK_FORMAT_R8G8B8_SSCALED:
1135 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001136 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001137 case VK_FORMAT_R8G8_SINT:
1138 case VK_FORMAT_R8G8_SSCALED:
1139 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001140 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001141 case VK_FORMAT_R8_SINT:
1142 case VK_FORMAT_R8_SSCALED:
1143 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1144 break;
1145 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1146 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1147 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1148 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1149 if(writeRGBA)
1150 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001151 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001152 }
1153 else
1154 {
1155 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1156 (writeB ? 0x3FF00000 : 0x0000) |
1157 (writeG ? 0x000FFC00 : 0x0000) |
1158 (writeR ? 0x000003FF : 0x0000);
1159 unsigned int unmask = ~mask;
1160 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001161 (As<UInt>(PackFields(c, { 0, 10, 20, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001162 }
1163 break;
1164 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1165 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1166 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1167 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1168 if(writeRGBA)
1169 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001170 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001171 }
1172 else
1173 {
1174 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1175 (writeR ? 0x3FF00000 : 0x0000) |
1176 (writeG ? 0x000FFC00 : 0x0000) |
1177 (writeB ? 0x000003FF : 0x0000);
1178 unsigned int unmask = ~mask;
1179 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001180 (As<UInt>(PackFields(c, { 20, 10, 0, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001181 }
1182 break;
1183 case VK_FORMAT_B8G8R8A8_UINT:
1184 case VK_FORMAT_B8G8R8A8_USCALED:
1185 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001186 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001187 case VK_FORMAT_B8G8R8_UINT:
1188 case VK_FORMAT_B8G8R8_USCALED:
1189 case VK_FORMAT_B8G8R8_SRGB:
1190 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1191 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1192 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1193 break;
1194 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1195 case VK_FORMAT_R8G8B8A8_UINT:
1196 case VK_FORMAT_R8G8B8A8_USCALED:
1197 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1198 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001199 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001200 case VK_FORMAT_R8G8B8_UINT:
1201 case VK_FORMAT_R8G8B8_USCALED:
1202 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001203 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001204 case VK_FORMAT_R8G8_UINT:
1205 case VK_FORMAT_R8G8_USCALED:
1206 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001207 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001208 case VK_FORMAT_R8_UINT:
1209 case VK_FORMAT_R8_USCALED:
1210 case VK_FORMAT_S8_UINT:
1211 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1212 break;
1213 case VK_FORMAT_R16G16B16A16_SINT:
1214 case VK_FORMAT_R16G16B16A16_SSCALED:
1215 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001216 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001217 case VK_FORMAT_R16G16B16_SINT:
1218 case VK_FORMAT_R16G16B16_SSCALED:
1219 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001220 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001221 case VK_FORMAT_R16G16_SINT:
1222 case VK_FORMAT_R16G16_SSCALED:
1223 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001224 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001225 case VK_FORMAT_R16_SINT:
1226 case VK_FORMAT_R16_SSCALED:
1227 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1228 break;
1229 case VK_FORMAT_R16G16B16A16_UINT:
1230 case VK_FORMAT_R16G16B16A16_USCALED:
1231 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001232 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001233 case VK_FORMAT_R16G16B16_UINT:
1234 case VK_FORMAT_R16G16B16_USCALED:
1235 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001236 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001237 case VK_FORMAT_R16G16_UINT:
1238 case VK_FORMAT_R16G16_USCALED:
1239 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001240 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001241 case VK_FORMAT_R16_UINT:
1242 case VK_FORMAT_R16_USCALED:
1243 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1244 break;
1245 case VK_FORMAT_R32G32B32A32_SINT:
1246 if(writeRGBA)
1247 {
1248 *Pointer<Int4>(element) = c;
1249 }
1250 else
1251 {
1252 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1253 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1254 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1255 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1256 }
1257 break;
1258 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001259 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1260 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1261 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001262 break;
1263 case VK_FORMAT_R32G32_SINT:
1264 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1265 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1266 break;
1267 case VK_FORMAT_R32_SINT:
1268 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1269 break;
1270 case VK_FORMAT_R32G32B32A32_UINT:
1271 if(writeRGBA)
1272 {
1273 *Pointer<UInt4>(element) = As<UInt4>(c);
1274 }
1275 else
1276 {
1277 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1278 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1279 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1280 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1281 }
1282 break;
1283 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001284 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001285 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001286 case VK_FORMAT_R32G32_UINT:
1287 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001288 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001289 case VK_FORMAT_R32_UINT:
1290 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1291 break;
1292 default:
1293 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001294 }
1295}
1296
1297void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1298{
1299 float4 scale{}, unscale{};
1300
1301 if(state.clearOperation &&
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001302 state.sourceFormat.isUnnormalizedInteger() &&
1303 !state.destFormat.isUnnormalizedInteger())
Nicolas Capens157ba262019-12-10 17:49:14 -05001304 {
1305 // If we're clearing a buffer from an int or uint color into a normalized color,
1306 // then the whole range of the int or uint color must be scaled between 0 and 1.
1307 switch(state.sourceFormat)
1308 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001309 case VK_FORMAT_R32G32B32A32_SINT:
1310 unscale = float4(static_cast<float>(0x7FFFFFFF));
1311 break;
1312 case VK_FORMAT_R32G32B32A32_UINT:
1313 unscale = float4(static_cast<float>(0xFFFFFFFF));
1314 break;
1315 default:
1316 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001317 }
1318 }
1319 else
1320 {
1321 unscale = state.sourceFormat.getScale();
1322 }
1323
1324 scale = state.destFormat.getScale();
1325
1326 bool srcSRGB = state.sourceFormat.isSRGBformat();
1327 bool dstSRGB = state.destFormat.isSRGBformat();
1328
Ben Claytonfccfc562019-12-17 20:37:31 +00001329 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
Nicolas Capens157ba262019-12-10 17:49:14 -05001330 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001331 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1332 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
Nicolas Capens157ba262019-12-10 17:49:14 -05001333 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
Ben Claytonfccfc562019-12-17 20:37:31 +00001334 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
Nicolas Capens157ba262019-12-10 17:49:14 -05001335 }
1336 else if(unscale != scale)
1337 {
1338 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1339 }
1340
1341 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1342 {
1343 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1344
1345 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1346 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1347 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1348 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1349 }
1350}
1351
1352Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
1353{
1354 return y * pitchB + x * bytes;
1355}
1356
1357Float4 Blitter::LinearToSRGB(Float4 &c)
1358{
1359 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1360 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1361
1362 Float4 s = c;
1363 s.xyz = Max(lc, ec);
1364
1365 return s;
1366}
1367
1368Float4 Blitter::sRGBtoLinear(Float4 &c)
1369{
1370 Float4 lc = c * Float4(1.0f / 12.92f);
1371 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1372
1373 Int4 linear = CmpLT(c, Float4(0.04045f));
1374
1375 Float4 s = c;
Ben Claytonfccfc562019-12-17 20:37:31 +00001376 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
Nicolas Capens157ba262019-12-10 17:49:14 -05001377
1378 return s;
1379}
1380
1381Blitter::BlitRoutineType Blitter::generate(const State &state)
1382{
1383 BlitFunction function;
1384 {
1385 Pointer<Byte> blit(function.Arg<0>());
1386
Ben Claytonfccfc562019-12-17 20:37:31 +00001387 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, source));
1388 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, dest));
1389 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData, sPitchB));
1390 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData, dPitchB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001391
Ben Claytonfccfc562019-12-17 20:37:31 +00001392 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData, x0));
1393 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData, y0));
1394 Float w = *Pointer<Float>(blit + OFFSET(BlitData, w));
1395 Float h = *Pointer<Float>(blit + OFFSET(BlitData, h));
Nicolas Capens157ba262019-12-10 17:49:14 -05001396
Ben Claytonfccfc562019-12-17 20:37:31 +00001397 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData, x0d));
1398 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData, x1d));
1399 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData, y0d));
1400 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData, y1d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001401
Ben Claytonfccfc562019-12-17 20:37:31 +00001402 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData, sWidth));
1403 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData, sHeight));
Nicolas Capens157ba262019-12-10 17:49:14 -05001404
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001405 bool intSrc = state.sourceFormat.isUnnormalizedInteger();
1406 bool intDst = state.destFormat.isUnnormalizedInteger();
Nicolas Capens157ba262019-12-10 17:49:14 -05001407 bool intBoth = intSrc && intDst;
1408 int srcBytes = state.sourceFormat.bytes();
1409 int dstBytes = state.destFormat.bytes();
1410
1411 bool hasConstantColorI = false;
1412 Int4 constantColorI;
1413 bool hasConstantColorF = false;
1414 Float4 constantColorF;
1415 if(state.clearOperation)
1416 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001417 if(intBoth) // Integer types
Nicolas Capens157ba262019-12-10 17:49:14 -05001418 {
1419 constantColorI = readInt4(source, state);
1420 hasConstantColorI = true;
1421 }
1422 else
1423 {
1424 constantColorF = readFloat4(source, state);
1425 hasConstantColorF = true;
1426
1427 ApplyScaleAndClamp(constantColorF, state);
1428 }
1429 }
1430
1431 For(Int j = y0d, j < y1d, j++)
1432 {
1433 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1434 Pointer<Byte> destLine = dest + j * dPitchB;
1435
1436 For(Int i = x0d, i < x1d, i++)
1437 {
1438 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1439 Pointer<Byte> d = destLine + i * dstBytes;
1440
1441 if(hasConstantColorI)
1442 {
1443 for(int s = 0; s < state.destSamples; s++)
1444 {
1445 write(constantColorI, d, state);
1446
1447 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1448 }
1449 }
1450 else if(hasConstantColorF)
1451 {
1452 for(int s = 0; s < state.destSamples; s++)
1453 {
1454 write(constantColorF, d, state);
1455
1456 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1457 }
1458 }
Ben Claytonfccfc562019-12-17 20:37:31 +00001459 else if(intBoth) // Integer types do not support filtering
Nicolas Capens157ba262019-12-10 17:49:14 -05001460 {
1461 Int X = Int(x);
1462 Int Y = Int(y);
1463
1464 if(state.clampToEdge)
1465 {
1466 X = Clamp(X, 0, sWidth - 1);
1467 Y = Clamp(Y, 0, sHeight - 1);
1468 }
1469
1470 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
1471
1472 // When both formats are true integer types, we don't go to float to avoid losing precision
1473 Int4 color = readInt4(s, state);
1474 for(int s = 0; s < state.destSamples; s++)
1475 {
1476 write(color, d, state);
1477
Ben Claytonfccfc562019-12-17 20:37:31 +00001478 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001479 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001480 }
1481 else
1482 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 Float4 color;
Nicolas Capens68a82382018-10-02 13:16:55 -04001484
Nicolas Capens157ba262019-12-10 17:49:14 -05001485 bool preScaled = false;
1486 if(!state.filter || intSrc)
Nicolas Capens68a82382018-10-02 13:16:55 -04001487 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001488 Int X = Int(x);
1489 Int Y = Int(y);
1490
1491 if(state.clampToEdge)
1492 {
1493 X = Clamp(X, 0, sWidth - 1);
1494 Y = Clamp(Y, 0, sHeight - 1);
1495 }
1496
Alexis Hetud34bb292019-11-13 17:18:02 -05001497 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
Nicolas Capens68a82382018-10-02 13:16:55 -04001498
Nicolas Capens157ba262019-12-10 17:49:14 -05001499 color = readFloat4(s, state);
1500
Ben Claytonfccfc562019-12-17 20:37:31 +00001501 if(state.srcSamples > 1) // Resolve multisampled source
Alexis Hetuf8df30f2019-10-23 18:03:21 -04001502 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001503 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001504 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001505 ApplyScaleAndClamp(color, state);
1506 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001507 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001508 Float4 accum = color;
1509 for(int sample = 1; sample < state.srcSamples; sample++)
Alexis Hetu54ec7592019-03-20 14:37:16 -04001510 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001511 s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1512 color = readFloat4(s, state);
1513
Ben Claytonfccfc562019-12-17 20:37:31 +00001514 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Alexis Hetua4308132019-06-13 09:55:26 -04001515 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001516 ApplyScaleAndClamp(color, state);
Alexis Hetua4308132019-06-13 09:55:26 -04001517 preScaled = true;
1518 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 accum += color;
Alexis Hetu54ec7592019-03-20 14:37:16 -04001520 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001521 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
Nicolas Capens68a82382018-10-02 13:16:55 -04001522 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001523 }
Ben Claytonfccfc562019-12-17 20:37:31 +00001524 else // Bilinear filtering
Nicolas Capens157ba262019-12-10 17:49:14 -05001525 {
1526 Float X = x;
1527 Float Y = y;
1528
1529 if(state.clampToEdge)
Nicolas Capens68a82382018-10-02 13:16:55 -04001530 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001531 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1532 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
Nicolas Capens68a82382018-10-02 13:16:55 -04001533 }
1534
Nicolas Capens157ba262019-12-10 17:49:14 -05001535 Float x0 = X - 0.5f;
1536 Float y0 = Y - 0.5f;
Nicolas Capens68a82382018-10-02 13:16:55 -04001537
Nicolas Capens157ba262019-12-10 17:49:14 -05001538 Int X0 = Max(Int(x0), 0);
1539 Int Y0 = Max(Int(y0), 0);
1540
1541 Int X1 = X0 + 1;
1542 Int Y1 = Y0 + 1;
1543 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1544 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1545
1546 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes);
1547 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes);
1548 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes);
1549 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes);
1550
1551 Float4 c00 = readFloat4(s00, state);
1552 Float4 c01 = readFloat4(s01, state);
1553 Float4 c10 = readFloat4(s10, state);
1554 Float4 c11 = readFloat4(s11, state);
1555
Ben Claytonfccfc562019-12-17 20:37:31 +00001556 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001557 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001558 ApplyScaleAndClamp(c00, state);
1559 ApplyScaleAndClamp(c01, state);
1560 ApplyScaleAndClamp(c10, state);
1561 ApplyScaleAndClamp(c11, state);
1562 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001563 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001564
1565 Float4 fx = Float4(x0 - Float(X0));
1566 Float4 fy = Float4(y0 - Float(Y0));
1567 Float4 ix = Float4(1.0f) - fx;
1568 Float4 iy = Float4(1.0f) - fy;
1569
1570 color = (c00 * ix + c01 * fx) * iy +
1571 (c10 * ix + c11 * fx) * fy;
1572 }
1573
1574 ApplyScaleAndClamp(color, state, preScaled);
1575
1576 for(int s = 0; s < state.destSamples; s++)
1577 {
1578 write(color, d, state);
1579
Ben Claytonfccfc562019-12-17 20:37:31 +00001580 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens68a82382018-10-02 13:16:55 -04001581 }
1582 }
1583 }
1584 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001585 }
1586
Nicolas Capens157ba262019-12-10 17:49:14 -05001587 return function("BlitRoutine");
1588}
1589
1590Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1591{
1592 std::unique_lock<std::mutex> lock(blitMutex);
1593 auto blitRoutine = blitCache.query(state);
1594
1595 if(!blitRoutine)
Alexis Hetu33642272019-03-01 11:55:59 -05001596 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001597 blitRoutine = generate(state);
1598 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001599 }
1600
Nicolas Capens157ba262019-12-10 17:49:14 -05001601 return blitRoutine;
1602}
1603
1604Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1605{
1606 std::unique_lock<std::mutex> lock(cornerUpdateMutex);
1607 auto cornerUpdateRoutine = cornerUpdateCache.query(state);
1608
1609 if(!cornerUpdateRoutine)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001610 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001611 cornerUpdateRoutine = generateCornerUpdate(state);
1612 cornerUpdateCache.add(state, cornerUpdateRoutine);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001613 }
1614
Nicolas Capens157ba262019-12-10 17:49:14 -05001615 return cornerUpdateRoutine;
1616}
1617
1618void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1619{
1620 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1621 auto format = src->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001622 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001623
1624 auto blitRoutine = getBlitRoutine(state);
1625 if(!blitRoutine)
Chris Forbes529eda32019-05-08 10:27:05 -07001626 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001627 return;
Chris Forbes529eda32019-05-08 10:27:05 -07001628 }
1629
Ben Claytonfccfc562019-12-17 20:37:31 +00001630 BlitData data = {
1631 nullptr, // source
1632 dst, // dest
1633 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1634 bufferRowPitch, // dPitchB
1635 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1636 bufferSlicePitch, // dSliceB
Chris Forbes529eda32019-05-08 10:27:05 -07001637
Nicolas Capens157ba262019-12-10 17:49:14 -05001638 0, 0, 1, 1,
Chris Forbes529eda32019-05-08 10:27:05 -07001639
Ben Claytonfccfc562019-12-17 20:37:31 +00001640 0, // y0d
1641 static_cast<int>(extent.height), // y1d
1642 0, // x0d
1643 static_cast<int>(extent.width), // x1d
Chris Forbes529eda32019-05-08 10:27:05 -07001644
Ben Claytonfccfc562019-12-17 20:37:31 +00001645 static_cast<int>(extent.width), // sWidth
1646 static_cast<int>(extent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001647 };
Chris Forbes529eda32019-05-08 10:27:05 -07001648
Nicolas Capens157ba262019-12-10 17:49:14 -05001649 VkOffset3D srcOffset = { 0, 0, offset.z };
Chris Forbes529eda32019-05-08 10:27:05 -07001650
Nicolas Capens157ba262019-12-10 17:49:14 -05001651 VkImageSubresourceLayers srcSubresLayers = subresource;
1652 srcSubresLayers.layerCount = 1;
Chris Forbes529eda32019-05-08 10:27:05 -07001653
Ben Claytonfccfc562019-12-17 20:37:31 +00001654 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001655 subresource.aspectMask,
1656 subresource.mipLevel,
1657 1,
1658 subresource.baseArrayLayer,
1659 subresource.layerCount
1660 };
Alexis Hetu33642272019-03-01 11:55:59 -05001661
Nicolas Capens157ba262019-12-10 17:49:14 -05001662 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
Alexis Hetu33642272019-03-01 11:55:59 -05001663
Nicolas Capens157ba262019-12-10 17:49:14 -05001664 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
Alexis Hetub317d962019-04-29 14:07:31 -04001665 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 srcOffset.z = offset.z;
Alexis Hetub317d962019-04-29 14:07:31 -04001667
Nicolas Capens157ba262019-12-10 17:49:14 -05001668 for(auto i = 0u; i < extent.depth; i++)
Alexis Hetub317d962019-04-29 14:07:31 -04001669 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001670 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1671 ASSERT(data.source < src->end());
1672 blitRoutine(&data);
1673 srcOffset.z++;
1674 data.dest = (dst += bufferSlicePitch);
Alexis Hetub317d962019-04-29 14:07:31 -04001675 }
1676 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001677}
Nicolas Capens157ba262019-12-10 17:49:14 -05001678
1679void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1680{
1681 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1682 auto format = dst->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001683 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001684
1685 auto blitRoutine = getBlitRoutine(state);
1686 if(!blitRoutine)
1687 {
1688 return;
1689 }
1690
Ben Claytonfccfc562019-12-17 20:37:31 +00001691 BlitData data = {
1692 src, // source
1693 nullptr, // dest
1694 bufferRowPitch, // sPitchB
1695 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1696 bufferSlicePitch, // sSliceB
1697 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001698
Ben Claytonfccfc562019-12-17 20:37:31 +00001699 static_cast<float>(-offset.x), // x0
1700 static_cast<float>(-offset.y), // y0
1701 1.0f, // w
1702 1.0f, // h
Nicolas Capens157ba262019-12-10 17:49:14 -05001703
Ben Claytonfccfc562019-12-17 20:37:31 +00001704 offset.y, // y0d
1705 static_cast<int>(offset.y + extent.height), // y1d
1706 offset.x, // x0d
1707 static_cast<int>(offset.x + extent.width), // x1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001708
Ben Claytonfccfc562019-12-17 20:37:31 +00001709 static_cast<int>(extent.width), // sWidth
1710 static_cast<int>(extent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001711 };
1712
1713 VkOffset3D dstOffset = { 0, 0, offset.z };
1714
1715 VkImageSubresourceLayers dstSubresLayers = subresource;
1716 dstSubresLayers.layerCount = 1;
1717
Ben Claytonfccfc562019-12-17 20:37:31 +00001718 VkImageSubresourceRange dstSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001719 subresource.aspectMask,
1720 subresource.mipLevel,
1721 1,
1722 subresource.baseArrayLayer,
1723 subresource.layerCount
1724 };
1725
1726 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1727
1728 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1729 {
1730 dstOffset.z = offset.z;
1731
1732 for(auto i = 0u; i < extent.depth; i++)
1733 {
1734 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1735 ASSERT(data.dest < dst->end());
1736 blitRoutine(&data);
1737 dstOffset.z++;
1738 data.source = (src += bufferSlicePitch);
1739 }
1740 }
1741}
1742
1743void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1744{
1745 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1746 {
1747 return;
1748 }
1749
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001750 // Vulkan 1.2 section 18.5. Image Copies with Scaling:
1751 // "The layerCount member of srcSubresource and dstSubresource must match"
1752 // "The aspectMask member of srcSubresource and dstSubresource must match"
1753 ASSERT(region.srcSubresource.layerCount == region.dstSubresource.layerCount);
1754 ASSERT(region.srcSubresource.aspectMask == region.dstSubresource.aspectMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05001755
1756 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1757 {
1758 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1759 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1760 }
1761
1762 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1763 {
1764 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1765 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1766 }
1767
1768 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1769 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1770 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1771
1772 int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z);
1773 ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z));
1774
1775 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1776 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1777 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1778 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
1779 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1780 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
1781
1782 auto srcFormat = src->getFormat(srcAspect);
1783 auto dstFormat = dst->getFormat(dstAspect);
1784
1785 bool doFilter = (filter != VK_FILTER_NEAREST);
1786 bool allowSRGBConversion =
Ben Claytonfccfc562019-12-17 20:37:31 +00001787 doFilter ||
1788 (src->getSampleCountFlagBits() > 1) ||
1789 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
Nicolas Capens157ba262019-12-10 17:49:14 -05001790
1791 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1792 Options{ doFilter, allowSRGBConversion });
1793 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1794 (region.srcOffsets[0].y < 0) ||
1795 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1796 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1797 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
1798
1799 auto blitRoutine = getBlitRoutine(state);
1800 if(!blitRoutine)
1801 {
1802 return;
1803 }
1804
Ben Claytonfccfc562019-12-17 20:37:31 +00001805 BlitData data = {
1806 nullptr, // source
1807 nullptr, // dest
1808 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1809 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1810 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1811 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001812
1813 x0,
1814 y0,
1815 widthRatio,
1816 heightRatio,
1817
Ben Claytonfccfc562019-12-17 20:37:31 +00001818 region.dstOffsets[0].y, // y0d
1819 region.dstOffsets[1].y, // y1d
1820 region.dstOffsets[0].x, // x0d
1821 region.dstOffsets[1].x, // x1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001822
Ben Claytonfccfc562019-12-17 20:37:31 +00001823 static_cast<int>(srcExtent.width), // sWidth
1824 static_cast<int>(srcExtent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001825 };
1826
1827 VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
1828 VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
1829
Ben Claytonfccfc562019-12-17 20:37:31 +00001830 VkImageSubresourceLayers srcSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001831 region.srcSubresource.aspectMask,
1832 region.srcSubresource.mipLevel,
1833 region.srcSubresource.baseArrayLayer,
1834 1
1835 };
1836
Ben Claytonfccfc562019-12-17 20:37:31 +00001837 VkImageSubresourceLayers dstSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001838 region.dstSubresource.aspectMask,
1839 region.dstSubresource.mipLevel,
1840 region.dstSubresource.baseArrayLayer,
1841 1
1842 };
1843
Ben Claytonfccfc562019-12-17 20:37:31 +00001844 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001845 region.srcSubresource.aspectMask,
1846 region.srcSubresource.mipLevel,
1847 1,
1848 region.srcSubresource.baseArrayLayer,
1849 region.srcSubresource.layerCount
1850 };
1851
1852 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1853
1854 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1855 {
1856 srcOffset.z = region.srcOffsets[0].z;
1857 dstOffset.z = region.dstOffsets[0].z;
1858
1859 for(int i = 0; i < numSlices; i++)
1860 {
1861 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1862 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1863
1864 ASSERT(data.source < src->end());
1865 ASSERT(data.dest < dst->end());
1866
1867 blitRoutine(&data);
1868 srcOffset.z++;
1869 dstOffset.z++;
1870 }
1871 }
1872}
1873
Ben Claytonfccfc562019-12-17 20:37:31 +00001874void Blitter::computeCubeCorner(Pointer<Byte> &layer, Int &x0, Int &x1, Int &y0, Int &y1, Int &pitchB, const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001875{
1876 int bytes = state.sourceFormat.bytes();
1877
1878 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
1879 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
1880 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
1881
1882 c *= Float4(1.0f / 3.0f);
1883
1884 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
1885}
1886
Ben Claytonfccfc562019-12-17 20:37:31 +00001887Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001888{
1889 // Reading and writing from/to the same image
1890 ASSERT(state.sourceFormat == state.destFormat);
1891 ASSERT(state.srcSamples == state.destSamples);
1892
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001893 // Vulkan 1.2: "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
1894 // VK_IMAGE_TYPE_2D, flags must not contain VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"
1895 ASSERT(state.srcSamples == 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001896
1897 CornerUpdateFunction function;
1898 {
1899 Pointer<Byte> blit(function.Arg<0>());
1900
1901 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1902 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1903 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1904 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1905
1906 // Low Border, Low Pixel, High Border, High Pixel
Ben Claytonfccfc562019-12-17 20:37:31 +00001907 Int LB(-1), LP(0), HB(dim), HP(dim - 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001908
1909 for(int face = 0; face < 6; face++)
1910 {
1911 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1912 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1913 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
1914 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
1915 layers = layers + layerSize;
1916 }
1917 }
1918
1919 return function("BlitRoutine");
1920}
1921
Ben Claytonfccfc562019-12-17 20:37:31 +00001922void Blitter::updateBorders(vk::Image *image, const VkImageSubresourceLayers &subresourceLayers)
Nicolas Capens157ba262019-12-10 17:49:14 -05001923{
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001924 ASSERT(image->getArrayLayers() >= (subresourceLayers.baseArrayLayer + 6));
Nicolas Capens157ba262019-12-10 17:49:14 -05001925
1926 // From Vulkan 1.1 spec, section 11.5. Image Views:
1927 // "For cube and cube array image views, the layers of the image view starting
1928 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
1929 VkImageSubresourceLayers posX = subresourceLayers;
1930 posX.layerCount = 1;
1931 VkImageSubresourceLayers negX = posX;
1932 negX.baseArrayLayer++;
1933 VkImageSubresourceLayers posY = negX;
1934 posY.baseArrayLayer++;
1935 VkImageSubresourceLayers negY = posY;
1936 negY.baseArrayLayer++;
1937 VkImageSubresourceLayers posZ = negY;
1938 posZ.baseArrayLayer++;
1939 VkImageSubresourceLayers negZ = posZ;
1940 negZ.baseArrayLayer++;
1941
1942 // Copy top / bottom
1943 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
1944 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
1945 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
1946 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
1947 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
1948 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
1949
1950 copyCubeEdge(image, posX, TOP, posY, RIGHT);
1951 copyCubeEdge(image, posY, TOP, negZ, TOP);
1952 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
1953 copyCubeEdge(image, negX, TOP, posY, LEFT);
1954 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
1955 copyCubeEdge(image, negZ, TOP, posY, TOP);
1956
1957 // Copy left / right
1958 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
1959 copyCubeEdge(image, posY, RIGHT, posX, TOP);
1960 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
1961 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
1962 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
1963 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
1964
1965 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
1966 copyCubeEdge(image, posY, LEFT, negX, TOP);
1967 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
1968 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
1969 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
1970 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
1971
1972 // Compute corner colors
1973 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
1974 vk::Format format = image->getFormat(aspect);
1975 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
1976 State state(format, format, samples, samples, Options{ 0xF });
1977
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001978 // Vulkan 1.2: "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
1979 // VK_IMAGE_TYPE_2D, flags must not contain VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"
1980 ASSERT(samples == VK_SAMPLE_COUNT_1_BIT);
Nicolas Capens157ba262019-12-10 17:49:14 -05001981
1982 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
1983 if(!cornerUpdateRoutine)
1984 {
1985 return;
1986 }
1987
1988 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
Ben Claytonfccfc562019-12-17 20:37:31 +00001989 CubeBorderData data = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001990 image->getTexelPointer({ 0, 0, 0 }, posX),
1991 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
1992 static_cast<uint32_t>(image->getLayerSize(aspect)),
1993 extent.width
1994 };
1995 cornerUpdateRoutine(&data);
1996}
1997
Ben Claytonfccfc562019-12-17 20:37:31 +00001998void Blitter::copyCubeEdge(vk::Image *image,
1999 const VkImageSubresourceLayers &dstSubresourceLayers, Edge dstEdge,
2000 const VkImageSubresourceLayers &srcSubresourceLayers, Edge srcEdge)
Nicolas Capens157ba262019-12-10 17:49:14 -05002001{
2002 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
2003 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
2004 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
2005 ASSERT(srcSubresourceLayers.layerCount == 1);
2006 ASSERT(dstSubresourceLayers.layerCount == 1);
2007
2008 // Figure out if the edges to be copied in reverse order respectively from one another
2009 // The copy should be reversed whenever the same edges are contiguous or if we're
2010 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
2011 //
2012 // | +y |
2013 // | -x | +z | +x | -z |
2014 // | -y |
2015
2016 bool reverse = (srcEdge == dstEdge) ||
2017 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
2018 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2019 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2020 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2021
2022 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
2023 int bytes = image->getFormat(aspect).bytes();
2024 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
2025
2026 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
2027 int w = extent.width;
2028 int h = extent.height;
2029 if(w != h)
2030 {
2031 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
2032 }
2033
2034 // Src is expressed in the regular [0, width-1], [0, height-1] space
2035 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2036 int srcDelta = srcHorizontal ? bytes : pitchB;
2037 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2038
2039 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2040 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2041 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2042 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2043
2044 // Don't write in the corners
2045 if(dstHorizontal)
2046 {
2047 dstOffset.x += reverse ? w : 1;
2048 }
2049 else
2050 {
2051 dstOffset.y += reverse ? h : 1;
2052 }
2053
Ben Claytonfccfc562019-12-17 20:37:31 +00002054 const uint8_t *src = static_cast<const uint8_t *>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2055 uint8_t *dst = static_cast<uint8_t *>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
Nicolas Capens157ba262019-12-10 17:49:14 -05002056 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2057 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2058
2059 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2060 {
2061 memcpy(dst, src, bytes);
2062 }
2063}
2064
Ben Claytonfccfc562019-12-17 20:37:31 +00002065} // namespace sw