blob: 02ad43b844d190b4dde775894e2e971327b29e1a [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
28#include <xmmintrin.h>
29#include <emmintrin.h>
30
31#undef min
32#undef max
33
34namespace sw
35{
36 extern bool quadLayoutEnabled;
37 extern bool complementaryDepthBuffer;
38 extern TranscendentalPrecision logPrecision;
39
40 unsigned int *Surface::palette = 0;
41 unsigned int Surface::paletteID = 0;
42
John Bauman19bac1e2014-05-06 15:23:49 -040043 void Rect::clip(int minX, int minY, int maxX, int maxY)
44 {
Nicolas Capens22658242014-11-29 00:31:41 -050045 x0 = clamp(x0, minX, maxX);
46 y0 = clamp(y0, minY, maxY);
47 x1 = clamp(x1, minX, maxX);
48 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040049 }
50
John Bauman89401822014-05-06 15:04:28 -040051 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
52 {
53 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
54
55 write(element, color);
56 }
57
58 void Surface::Buffer::write(int x, int y, const Color<float> &color)
59 {
60 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
61
62 write(element, color);
63 }
64
65 inline void Surface::Buffer::write(void *element, const Color<float> &color)
66 {
67 switch(format)
68 {
69 case FORMAT_A8:
70 *(unsigned char*)element = unorm<8>(color.a);
71 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040072 case FORMAT_R8I_SNORM:
73 *(char*)element = snorm<8>(color.r);
74 break;
John Bauman89401822014-05-06 15:04:28 -040075 case FORMAT_R8:
76 *(unsigned char*)element = unorm<8>(color.r);
77 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040078 case FORMAT_R8I:
79 *(char*)element = scast<8>(color.r);
80 break;
81 case FORMAT_R8UI:
82 *(unsigned char*)element = ucast<8>(color.r);
83 break;
84 case FORMAT_R16I:
85 *(short*)element = scast<16>(color.r);
86 break;
87 case FORMAT_R16UI:
88 *(unsigned short*)element = ucast<16>(color.r);
89 break;
90 case FORMAT_R32I:
91 *(int*)element = static_cast<int>(color.r);
92 break;
93 case FORMAT_R32UI:
94 *(unsigned int*)element = static_cast<unsigned int>(color.r);
95 break;
John Bauman89401822014-05-06 15:04:28 -040096 case FORMAT_R3G3B2:
97 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
98 break;
99 case FORMAT_A8R3G3B2:
100 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
101 break;
102 case FORMAT_X4R4G4B4:
103 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
104 break;
105 case FORMAT_A4R4G4B4:
106 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
107 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400108 case FORMAT_R4G4B4A4:
109 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
110 break;
John Bauman89401822014-05-06 15:04:28 -0400111 case FORMAT_R5G6B5:
112 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
113 break;
114 case FORMAT_A1R5G5B5:
115 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
116 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400117 case FORMAT_R5G5B5A1:
118 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
119 break;
John Bauman89401822014-05-06 15:04:28 -0400120 case FORMAT_X1R5G5B5:
121 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
122 break;
123 case FORMAT_A8R8G8B8:
124 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
125 break;
126 case FORMAT_X8R8G8B8:
127 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
128 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400129 case FORMAT_A8B8G8R8I_SNORM:
130 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
131 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
132 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
133 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
134 break;
John Bauman89401822014-05-06 15:04:28 -0400135 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400136 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400137 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
138 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400139 case FORMAT_A8B8G8R8I:
140 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
141 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
142 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
143 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
144 break;
145 case FORMAT_A8B8G8R8UI:
146 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
147 break;
148 case FORMAT_X8B8G8R8I_SNORM:
149 *(unsigned int*)element = 0x7F000000 |
150 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
151 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
152 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
153 break;
John Bauman89401822014-05-06 15:04:28 -0400154 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400155 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400156 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
157 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400158 case FORMAT_X8B8G8R8I:
159 *(unsigned int*)element = 0x7F000000 |
160 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
161 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
162 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
163 case FORMAT_X8B8G8R8UI:
164 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
165 break;
John Bauman89401822014-05-06 15:04:28 -0400166 case FORMAT_A2R10G10B10:
167 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
168 break;
169 case FORMAT_A2B10G10R10:
170 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
171 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400172 case FORMAT_G8R8I_SNORM:
173 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
174 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
175 break;
John Bauman89401822014-05-06 15:04:28 -0400176 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400177 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
178 break;
179 case FORMAT_G8R8I:
180 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
181 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
182 break;
183 case FORMAT_G8R8UI:
184 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400185 break;
186 case FORMAT_G16R16:
187 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
188 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400189 case FORMAT_G16R16I:
190 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
191 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
192 break;
193 case FORMAT_G16R16UI:
194 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
195 break;
196 case FORMAT_G32R32I:
197 case FORMAT_G32R32UI:
198 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
199 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
200 break;
John Bauman89401822014-05-06 15:04:28 -0400201 case FORMAT_A16B16G16R16:
202 ((unsigned short*)element)[0] = unorm<16>(color.r);
203 ((unsigned short*)element)[1] = unorm<16>(color.g);
204 ((unsigned short*)element)[2] = unorm<16>(color.b);
205 ((unsigned short*)element)[3] = unorm<16>(color.a);
206 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400207 case FORMAT_A16B16G16R16I:
208 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
209 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
210 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
211 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
212 break;
213 case FORMAT_A16B16G16R16UI:
214 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
215 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
216 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
217 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
218 break;
219 case FORMAT_X16B16G16R16I:
220 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
221 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
222 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
223 break;
224 case FORMAT_X16B16G16R16UI:
225 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
226 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
227 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
228 break;
229 case FORMAT_A32B32G32R32I:
230 case FORMAT_A32B32G32R32UI:
231 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
232 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
233 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
234 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
235 break;
236 case FORMAT_X32B32G32R32I:
237 case FORMAT_X32B32G32R32UI:
238 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
239 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
240 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
241 break;
John Bauman89401822014-05-06 15:04:28 -0400242 case FORMAT_V8U8:
243 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
244 break;
245 case FORMAT_L6V5U5:
246 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
247 break;
248 case FORMAT_Q8W8V8U8:
249 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
250 break;
251 case FORMAT_X8L8V8U8:
252 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
253 break;
254 case FORMAT_V16U16:
255 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
256 break;
257 case FORMAT_A2W10V10U10:
258 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
259 break;
260 case FORMAT_A16W16V16U16:
261 ((unsigned short*)element)[0] = snorm<16>(color.r);
262 ((unsigned short*)element)[1] = snorm<16>(color.g);
263 ((unsigned short*)element)[2] = snorm<16>(color.b);
264 ((unsigned short*)element)[3] = unorm<16>(color.a);
265 break;
266 case FORMAT_Q16W16V16U16:
267 ((unsigned short*)element)[0] = snorm<16>(color.r);
268 ((unsigned short*)element)[1] = snorm<16>(color.g);
269 ((unsigned short*)element)[2] = snorm<16>(color.b);
270 ((unsigned short*)element)[3] = snorm<16>(color.a);
271 break;
272 case FORMAT_R8G8B8:
273 ((unsigned char*)element)[0] = unorm<8>(color.b);
274 ((unsigned char*)element)[1] = unorm<8>(color.g);
275 ((unsigned char*)element)[2] = unorm<8>(color.r);
276 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400277 case FORMAT_B8G8R8:
278 ((unsigned char*)element)[0] = unorm<8>(color.r);
279 ((unsigned char*)element)[1] = unorm<8>(color.g);
280 ((unsigned char*)element)[2] = unorm<8>(color.b);
281 break;
John Bauman89401822014-05-06 15:04:28 -0400282 case FORMAT_R16F:
283 *(half*)element = (half)color.r;
284 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400285 case FORMAT_A16F:
286 *(half*)element = (half)color.a;
287 break;
John Bauman89401822014-05-06 15:04:28 -0400288 case FORMAT_G16R16F:
289 ((half*)element)[0] = (half)color.r;
290 ((half*)element)[1] = (half)color.g;
291 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400292 case FORMAT_B16G16R16F:
293 ((half*)element)[0] = (half)color.r;
294 ((half*)element)[1] = (half)color.g;
295 ((half*)element)[2] = (half)color.b;
296 break;
John Bauman89401822014-05-06 15:04:28 -0400297 case FORMAT_A16B16G16R16F:
298 ((half*)element)[0] = (half)color.r;
299 ((half*)element)[1] = (half)color.g;
300 ((half*)element)[2] = (half)color.b;
301 ((half*)element)[3] = (half)color.a;
302 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400303 case FORMAT_A32F:
304 *(float*)element = color.a;
305 break;
John Bauman89401822014-05-06 15:04:28 -0400306 case FORMAT_R32F:
307 *(float*)element = color.r;
308 break;
309 case FORMAT_G32R32F:
310 ((float*)element)[0] = color.r;
311 ((float*)element)[1] = color.g;
312 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400313 case FORMAT_X32B32G32R32F:
314 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400315 case FORMAT_B32G32R32F:
316 ((float*)element)[0] = color.r;
317 ((float*)element)[1] = color.g;
318 ((float*)element)[2] = color.b;
319 break;
John Bauman89401822014-05-06 15:04:28 -0400320 case FORMAT_A32B32G32R32F:
321 ((float*)element)[0] = color.r;
322 ((float*)element)[1] = color.g;
323 ((float*)element)[2] = color.b;
324 ((float*)element)[3] = color.a;
325 break;
326 case FORMAT_D32F:
327 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400328 case FORMAT_D32FS8_TEXTURE:
329 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400330 *((float*)element) = color.r;
331 break;
332 case FORMAT_D32F_COMPLEMENTARY:
333 *((float*)element) = 1 - color.r;
334 break;
335 case FORMAT_S8:
336 *((unsigned char*)element) = unorm<8>(color.r);
337 break;
338 case FORMAT_L8:
339 *(unsigned char*)element = unorm<8>(color.r);
340 break;
341 case FORMAT_A4L4:
342 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
343 break;
344 case FORMAT_L16:
345 *(unsigned short*)element = unorm<16>(color.r);
346 break;
347 case FORMAT_A8L8:
348 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
349 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400350 case FORMAT_L16F:
351 *(half*)element = (half)color.r;
352 break;
353 case FORMAT_A16L16F:
354 ((half*)element)[0] = (half)color.r;
355 ((half*)element)[1] = (half)color.a;
356 break;
357 case FORMAT_L32F:
358 *(float*)element = color.r;
359 break;
360 case FORMAT_A32L32F:
361 ((float*)element)[0] = color.r;
362 ((float*)element)[1] = color.a;
363 break;
John Bauman89401822014-05-06 15:04:28 -0400364 default:
365 ASSERT(false);
366 }
367 }
368
369 Color<float> Surface::Buffer::read(int x, int y, int z) const
370 {
371 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
372
373 return read(element);
374 }
375
376 Color<float> Surface::Buffer::read(int x, int y) const
377 {
378 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
379
380 return read(element);
381 }
382
383 inline Color<float> Surface::Buffer::read(void *element) const
384 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400385 float r = 0.0f;
386 float g = 0.0f;
387 float b = 0.0f;
388 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400389
390 switch(format)
391 {
392 case FORMAT_P8:
393 {
394 ASSERT(palette);
395
396 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400397
John Bauman89401822014-05-06 15:04:28 -0400398 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
399 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
400 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
401 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
402 }
403 break;
404 case FORMAT_A8P8:
405 {
406 ASSERT(palette);
407
408 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400409
John Bauman89401822014-05-06 15:04:28 -0400410 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
411 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
412 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
413 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
414 }
415 break;
416 case FORMAT_A8:
417 r = 0;
418 g = 0;
419 b = 0;
420 a = *(unsigned char*)element * (1.0f / 0xFF);
421 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400422 case FORMAT_R8I_SNORM:
423 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
424 break;
John Bauman89401822014-05-06 15:04:28 -0400425 case FORMAT_R8:
426 r = *(unsigned char*)element * (1.0f / 0xFF);
427 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400428 case FORMAT_R8I:
429 r = *(signed char*)element;
430 break;
431 case FORMAT_R8UI:
432 r = *(unsigned char*)element;
433 break;
John Bauman89401822014-05-06 15:04:28 -0400434 case FORMAT_R3G3B2:
435 {
436 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400437
John Bauman89401822014-05-06 15:04:28 -0400438 r = (rgb & 0xE0) * (1.0f / 0xE0);
439 g = (rgb & 0x1C) * (1.0f / 0x1C);
440 b = (rgb & 0x03) * (1.0f / 0x03);
441 }
442 break;
443 case FORMAT_A8R3G3B2:
444 {
445 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400446
John Bauman89401822014-05-06 15:04:28 -0400447 a = (argb & 0xFF00) * (1.0f / 0xFF00);
448 r = (argb & 0x00E0) * (1.0f / 0x00E0);
449 g = (argb & 0x001C) * (1.0f / 0x001C);
450 b = (argb & 0x0003) * (1.0f / 0x0003);
451 }
452 break;
453 case FORMAT_X4R4G4B4:
454 {
455 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400456
John Bauman89401822014-05-06 15:04:28 -0400457 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
458 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
459 b = (rgb & 0x000F) * (1.0f / 0x000F);
460 }
461 break;
462 case FORMAT_A4R4G4B4:
463 {
464 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400465
John Bauman89401822014-05-06 15:04:28 -0400466 a = (argb & 0xF000) * (1.0f / 0xF000);
467 r = (argb & 0x0F00) * (1.0f / 0x0F00);
468 g = (argb & 0x00F0) * (1.0f / 0x00F0);
469 b = (argb & 0x000F) * (1.0f / 0x000F);
470 }
471 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400472 case FORMAT_R4G4B4A4:
473 {
474 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400475
Nicolas Capens80594422015-06-09 16:42:56 -0400476 r = (rgba & 0xF000) * (1.0f / 0xF000);
477 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
478 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
479 a = (rgba & 0x000F) * (1.0f / 0x000F);
480 }
481 break;
John Bauman89401822014-05-06 15:04:28 -0400482 case FORMAT_R5G6B5:
483 {
484 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400485
John Bauman89401822014-05-06 15:04:28 -0400486 r = (rgb & 0xF800) * (1.0f / 0xF800);
487 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
488 b = (rgb & 0x001F) * (1.0f / 0x001F);
489 }
490 break;
491 case FORMAT_A1R5G5B5:
492 {
493 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400494
John Bauman89401822014-05-06 15:04:28 -0400495 a = (argb & 0x8000) * (1.0f / 0x8000);
496 r = (argb & 0x7C00) * (1.0f / 0x7C00);
497 g = (argb & 0x03E0) * (1.0f / 0x03E0);
498 b = (argb & 0x001F) * (1.0f / 0x001F);
499 }
500 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400501 case FORMAT_R5G5B5A1:
502 {
503 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400504
Nicolas Capens80594422015-06-09 16:42:56 -0400505 r = (rgba & 0xF800) * (1.0f / 0xF800);
506 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
507 b = (rgba & 0x003E) * (1.0f / 0x003E);
508 a = (rgba & 0x0001) * (1.0f / 0x0001);
509 }
510 break;
John Bauman89401822014-05-06 15:04:28 -0400511 case FORMAT_X1R5G5B5:
512 {
513 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400514
John Bauman89401822014-05-06 15:04:28 -0400515 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
516 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
517 b = (xrgb & 0x001F) * (1.0f / 0x001F);
518 }
519 break;
520 case FORMAT_A8R8G8B8:
521 {
522 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400523
John Bauman89401822014-05-06 15:04:28 -0400524 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
525 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
526 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
527 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
528 }
529 break;
530 case FORMAT_X8R8G8B8:
531 {
532 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400533
John Bauman89401822014-05-06 15:04:28 -0400534 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
535 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
536 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
537 }
538 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400539 case FORMAT_A8B8G8R8I_SNORM:
540 {
541 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400542
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400543 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
544 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
545 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
546 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
547 }
548 break;
John Bauman89401822014-05-06 15:04:28 -0400549 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400550 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400551 {
552 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400553
John Bauman89401822014-05-06 15:04:28 -0400554 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
555 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
556 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
557 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
558 }
559 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400560 case FORMAT_A8B8G8R8I:
561 {
562 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400563
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400564 r = abgr[0];
565 g = abgr[1];
566 b = abgr[2];
567 a = abgr[3];
568 }
569 break;
570 case FORMAT_A8B8G8R8UI:
571 {
572 unsigned char* abgr = (unsigned char*)element;
573
574 r = abgr[0];
575 g = abgr[1];
576 b = abgr[2];
577 a = abgr[3];
578 }
579 break;
580 case FORMAT_X8B8G8R8I_SNORM:
581 {
582 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400583
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400584 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
585 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
586 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
587 }
588 break;
John Bauman89401822014-05-06 15:04:28 -0400589 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400590 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400591 {
592 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400593
John Bauman89401822014-05-06 15:04:28 -0400594 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
595 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
596 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
597 }
598 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400599 case FORMAT_X8B8G8R8I:
600 {
601 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400602
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400603 r = bgr[0];
604 g = bgr[1];
605 b = bgr[2];
606 }
607 break;
608 case FORMAT_X8B8G8R8UI:
609 {
610 unsigned char* bgr = (unsigned char*)element;
611
612 r = bgr[0];
613 g = bgr[1];
614 b = bgr[2];
615 }
616 break;
617 case FORMAT_G8R8I_SNORM:
618 {
619 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400620
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400621 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
622 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
623 }
624 break;
John Bauman89401822014-05-06 15:04:28 -0400625 case FORMAT_G8R8:
626 {
627 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400628
John Bauman89401822014-05-06 15:04:28 -0400629 g = (gr & 0xFF00) * (1.0f / 0xFF00);
630 r = (gr & 0x00FF) * (1.0f / 0x00FF);
631 }
632 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400633 case FORMAT_G8R8I:
634 {
635 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400636
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400637 r = gr[0];
638 g = gr[1];
639 }
640 break;
641 case FORMAT_G8R8UI:
642 {
643 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400644
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400645 r = gr[0];
646 g = gr[1];
647 }
648 break;
649 case FORMAT_R16I:
650 r = *((short*)element);
651 break;
652 case FORMAT_R16UI:
653 r = *((unsigned short*)element);
654 break;
655 case FORMAT_G16R16I:
656 {
657 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400658
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400659 r = gr[0];
660 g = gr[1];
661 }
662 break;
John Bauman89401822014-05-06 15:04:28 -0400663 case FORMAT_G16R16:
664 {
665 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400666
John Bauman89401822014-05-06 15:04:28 -0400667 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
668 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
669 }
670 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400671 case FORMAT_G16R16UI:
672 {
673 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400674
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400675 r = gr[0];
676 g = gr[1];
677 }
678 break;
John Bauman89401822014-05-06 15:04:28 -0400679 case FORMAT_A2R10G10B10:
680 {
681 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400682
John Bauman89401822014-05-06 15:04:28 -0400683 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
684 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
685 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
686 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
687 }
688 break;
689 case FORMAT_A2B10G10R10:
690 {
691 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400692
John Bauman89401822014-05-06 15:04:28 -0400693 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
694 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
695 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
696 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
697 }
698 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400699 case FORMAT_A16B16G16R16I:
700 {
701 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400702
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400703 r = abgr[0];
704 g = abgr[1];
705 b = abgr[2];
706 a = abgr[3];
707 }
708 break;
John Bauman89401822014-05-06 15:04:28 -0400709 case FORMAT_A16B16G16R16:
710 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
711 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
712 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
713 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
714 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400715 case FORMAT_A16B16G16R16UI:
716 {
717 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400718
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400719 r = abgr[0];
720 g = abgr[1];
721 b = abgr[2];
722 a = abgr[3];
723 }
724 break;
725 case FORMAT_X16B16G16R16I:
726 {
727 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400728
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400729 r = bgr[0];
730 g = bgr[1];
731 b = bgr[2];
732 }
733 break;
734 case FORMAT_X16B16G16R16UI:
735 {
736 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400737
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400738 r = bgr[0];
739 g = bgr[1];
740 b = bgr[2];
741 }
742 break;
743 case FORMAT_A32B32G32R32I:
744 {
745 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400746
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400747 r = static_cast<float>(abgr[0]);
748 g = static_cast<float>(abgr[1]);
749 b = static_cast<float>(abgr[2]);
750 a = static_cast<float>(abgr[3]);
751 }
752 break;
753 case FORMAT_A32B32G32R32UI:
754 {
755 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400756
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400757 r = static_cast<float>(abgr[0]);
758 g = static_cast<float>(abgr[1]);
759 b = static_cast<float>(abgr[2]);
760 a = static_cast<float>(abgr[3]);
761 }
762 break;
763 case FORMAT_X32B32G32R32I:
764 {
765 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400766
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400767 r = static_cast<float>(bgr[0]);
768 g = static_cast<float>(bgr[1]);
769 b = static_cast<float>(bgr[2]);
770 }
771 break;
772 case FORMAT_X32B32G32R32UI:
773 {
774 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400775
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400776 r = static_cast<float>(bgr[0]);
777 g = static_cast<float>(bgr[1]);
778 b = static_cast<float>(bgr[2]);
779 }
780 break;
781 case FORMAT_G32R32I:
782 {
783 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400784
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400785 r = static_cast<float>(gr[0]);
786 g = static_cast<float>(gr[1]);
787 }
788 break;
789 case FORMAT_G32R32UI:
790 {
791 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400792
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400793 r = static_cast<float>(gr[0]);
794 g = static_cast<float>(gr[1]);
795 }
796 break;
797 case FORMAT_R32I:
798 r = static_cast<float>(*((int*)element));
799 break;
800 case FORMAT_R32UI:
801 r = static_cast<float>(*((unsigned int*)element));
802 break;
John Bauman89401822014-05-06 15:04:28 -0400803 case FORMAT_V8U8:
804 {
805 unsigned short vu = *(unsigned short*)element;
806
807 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
808 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
809 }
810 break;
811 case FORMAT_L6V5U5:
812 {
813 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400814
John Bauman89401822014-05-06 15:04:28 -0400815 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
816 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
817 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
818 }
819 break;
820 case FORMAT_Q8W8V8U8:
821 {
822 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400823
John Bauman89401822014-05-06 15:04:28 -0400824 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
825 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
826 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
827 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
828 }
829 break;
830 case FORMAT_X8L8V8U8:
831 {
832 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400833
John Bauman89401822014-05-06 15:04:28 -0400834 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
835 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
836 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
837 }
838 break;
839 case FORMAT_R8G8B8:
840 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
841 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
842 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
843 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400844 case FORMAT_B8G8R8:
845 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
846 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
847 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
848 break;
John Bauman89401822014-05-06 15:04:28 -0400849 case FORMAT_V16U16:
850 {
851 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400852
John Bauman89401822014-05-06 15:04:28 -0400853 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
854 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
855 }
856 break;
857 case FORMAT_A2W10V10U10:
858 {
859 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400860
John Bauman89401822014-05-06 15:04:28 -0400861 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
862 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
863 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
864 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
865 }
866 break;
867 case FORMAT_A16W16V16U16:
868 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
869 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
870 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
871 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
872 break;
873 case FORMAT_Q16W16V16U16:
874 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
875 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
876 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
877 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
878 break;
879 case FORMAT_L8:
880 r =
881 g =
882 b = *(unsigned char*)element * (1.0f / 0xFF);
883 break;
884 case FORMAT_A4L4:
885 {
886 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400887
John Bauman89401822014-05-06 15:04:28 -0400888 r =
889 g =
890 b = (al & 0x0F) * (1.0f / 0x0F);
891 a = (al & 0xF0) * (1.0f / 0xF0);
892 }
893 break;
894 case FORMAT_L16:
895 r =
896 g =
897 b = *(unsigned short*)element * (1.0f / 0xFFFF);
898 break;
899 case FORMAT_A8L8:
900 r =
901 g =
902 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
903 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
904 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400905 case FORMAT_L16F:
906 r =
907 g =
908 b = *(half*)element;
909 break;
910 case FORMAT_A16L16F:
911 r =
912 g =
913 b = ((half*)element)[0];
914 a = ((half*)element)[1];
915 break;
916 case FORMAT_L32F:
917 r =
918 g =
919 b = *(float*)element;
920 break;
921 case FORMAT_A32L32F:
922 r =
923 g =
924 b = ((float*)element)[0];
925 a = ((float*)element)[1];
926 break;
927 case FORMAT_A16F:
928 a = *(half*)element;
929 break;
John Bauman89401822014-05-06 15:04:28 -0400930 case FORMAT_R16F:
931 r = *(half*)element;
932 break;
933 case FORMAT_G16R16F:
934 r = ((half*)element)[0];
935 g = ((half*)element)[1];
936 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400937 case FORMAT_B16G16R16F:
938 r = ((half*)element)[0];
939 g = ((half*)element)[1];
940 b = ((half*)element)[2];
941 break;
John Bauman89401822014-05-06 15:04:28 -0400942 case FORMAT_A16B16G16R16F:
943 r = ((half*)element)[0];
944 g = ((half*)element)[1];
945 b = ((half*)element)[2];
946 a = ((half*)element)[3];
947 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400948 case FORMAT_A32F:
949 a = *(float*)element;
950 break;
John Bauman89401822014-05-06 15:04:28 -0400951 case FORMAT_R32F:
952 r = *(float*)element;
953 break;
954 case FORMAT_G32R32F:
955 r = ((float*)element)[0];
956 g = ((float*)element)[1];
957 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400958 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400959 case FORMAT_B32G32R32F:
960 r = ((float*)element)[0];
961 g = ((float*)element)[1];
962 b = ((float*)element)[2];
963 break;
John Bauman89401822014-05-06 15:04:28 -0400964 case FORMAT_A32B32G32R32F:
965 r = ((float*)element)[0];
966 g = ((float*)element)[1];
967 b = ((float*)element)[2];
968 a = ((float*)element)[3];
969 break;
970 case FORMAT_D32F:
971 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400972 case FORMAT_D32FS8_TEXTURE:
973 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400974 r = *(float*)element;
975 g = r;
976 b = r;
977 a = r;
978 break;
979 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400980 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400981 g = r;
982 b = r;
983 a = r;
984 break;
985 case FORMAT_S8:
986 r = *(unsigned char*)element * (1.0f / 0xFF);
987 break;
988 default:
989 ASSERT(false);
990 }
991
992 // if(sRGB)
993 // {
994 // r = sRGBtoLinear(r);
995 // g = sRGBtoLinear(g);
996 // b = sRGBtoLinear(b);
997 // }
998
999 return Color<float>(r, g, b, a);
1000 }
1001
1002 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1003 {
1004 x -= 0.5f;
1005 y -= 0.5f;
1006 z -= 0.5f;
1007
1008 int x0 = clamp((int)x, 0, width - 1);
1009 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1010
1011 int y0 = clamp((int)y, 0, height - 1);
1012 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1013
1014 int z0 = clamp((int)z, 0, depth - 1);
1015 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1016
1017 Color<float> c000 = read(x0, y0, z0);
1018 Color<float> c100 = read(x1, y0, z0);
1019 Color<float> c010 = read(x0, y1, z0);
1020 Color<float> c110 = read(x1, y1, z0);
1021 Color<float> c001 = read(x0, y0, z1);
1022 Color<float> c101 = read(x1, y0, z1);
1023 Color<float> c011 = read(x0, y1, z1);
1024 Color<float> c111 = read(x1, y1, z1);
1025
1026 float fx = x - x0;
1027 float fy = y - y0;
1028 float fz = z - z0;
1029
1030 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1031 c100 *= fx * (1 - fy) * (1 - fz);
1032 c010 *= (1 - fx) * fy * (1 - fz);
1033 c110 *= fx * fy * (1 - fz);
1034 c001 *= (1 - fx) * (1 - fy) * fz;
1035 c101 *= fx * (1 - fy) * fz;
1036 c011 *= (1 - fx) * fy * fz;
1037 c111 *= fx * fy * fz;
1038
1039 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1040 }
1041
1042 Color<float> Surface::Buffer::sample(float x, float y) const
1043 {
1044 x -= 0.5f;
1045 y -= 0.5f;
1046
1047 int x0 = clamp((int)x, 0, width - 1);
1048 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1049
1050 int y0 = clamp((int)y, 0, height - 1);
1051 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1052
1053 Color<float> c00 = read(x0, y0);
1054 Color<float> c10 = read(x1, y0);
1055 Color<float> c01 = read(x0, y1);
1056 Color<float> c11 = read(x1, y1);
1057
1058 float fx = x - x0;
1059 float fy = y - y0;
1060
1061 c00 *= (1 - fx) * (1 - fy);
1062 c10 *= fx * (1 - fy);
1063 c01 *= (1 - fx) * fy;
1064 c11 *= fx * fy;
1065
1066 return c00 + c10 + c01 + c11;
1067 }
1068
John Bauman19bac1e2014-05-06 15:23:49 -04001069 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001070 {
1071 this->lock = lock;
1072
1073 switch(lock)
1074 {
1075 case LOCK_UNLOCKED:
1076 case LOCK_READONLY:
1077 break;
1078 case LOCK_WRITEONLY:
1079 case LOCK_READWRITE:
1080 case LOCK_DISCARD:
1081 dirty = true;
1082 break;
1083 default:
1084 ASSERT(false);
1085 }
1086
John Baumand4ae8632014-05-06 16:18:33 -04001087 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001088 {
John Baumand4ae8632014-05-06 16:18:33 -04001089 switch(format)
1090 {
1091 #if S3TC_SUPPORT
1092 case FORMAT_DXT1:
1093 #endif
1094 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001095 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001096 case FORMAT_R11_EAC:
1097 case FORMAT_SIGNED_R11_EAC:
1098 case FORMAT_RGB8_ETC2:
1099 case FORMAT_SRGB8_ETC2:
1100 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1101 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001102 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001103 case FORMAT_RG11_EAC:
1104 case FORMAT_SIGNED_RG11_EAC:
1105 case FORMAT_RGBA8_ETC2_EAC:
1106 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1107 case FORMAT_RGBA_ASTC_4x4_KHR:
1108 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1109 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1110 case FORMAT_RGBA_ASTC_5x4_KHR:
1111 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1112 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1113 case FORMAT_RGBA_ASTC_5x5_KHR:
1114 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1115 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1116 case FORMAT_RGBA_ASTC_6x5_KHR:
1117 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1118 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1119 case FORMAT_RGBA_ASTC_6x6_KHR:
1120 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1121 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1122 case FORMAT_RGBA_ASTC_8x5_KHR:
1123 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1124 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1125 case FORMAT_RGBA_ASTC_8x6_KHR:
1126 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1127 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1128 case FORMAT_RGBA_ASTC_8x8_KHR:
1129 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1130 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1131 case FORMAT_RGBA_ASTC_10x5_KHR:
1132 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1133 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1134 case FORMAT_RGBA_ASTC_10x6_KHR:
1135 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1136 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1137 case FORMAT_RGBA_ASTC_10x8_KHR:
1138 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1139 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1140 case FORMAT_RGBA_ASTC_10x10_KHR:
1141 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1142 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1143 case FORMAT_RGBA_ASTC_12x10_KHR:
1144 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1145 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1146 case FORMAT_RGBA_ASTC_12x12_KHR:
1147 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1148 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001149 #if S3TC_SUPPORT
1150 case FORMAT_DXT3:
1151 case FORMAT_DXT5:
1152 #endif
1153 case FORMAT_ATI2:
1154 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1155 default:
1156 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1157 }
John Bauman89401822014-05-06 15:04:28 -04001158 }
1159
1160 return 0;
1161 }
1162
1163 void Surface::Buffer::unlockRect()
1164 {
1165 lock = LOCK_UNLOCKED;
1166 }
1167
Nicolas Capens477314b2015-06-09 16:47:29 -04001168 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1169 {
1170 resource = new Resource(0);
1171 hasParent = false;
1172 ownExternal = false;
1173 depth = max(1, depth);
1174
1175 external.buffer = pixels;
1176 external.width = width;
1177 external.height = height;
1178 external.depth = depth;
1179 external.format = format;
1180 external.bytes = bytes(external.format);
1181 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001182 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001183 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001184 external.sliceP = external.bytes ? slice / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001185 external.lock = LOCK_UNLOCKED;
1186 external.dirty = true;
1187
1188 internal.buffer = 0;
1189 internal.width = width;
1190 internal.height = height;
1191 internal.depth = depth;
1192 internal.format = selectInternalFormat(format);
1193 internal.bytes = bytes(internal.format);
1194 internal.pitchB = pitchB(internal.width, internal.format, false);
1195 internal.pitchP = pitchP(internal.width, internal.format, false);
1196 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
1197 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
1198 internal.lock = LOCK_UNLOCKED;
1199 internal.dirty = false;
1200
1201 stencil.buffer = 0;
1202 stencil.width = width;
1203 stencil.height = height;
1204 stencil.depth = depth;
1205 stencil.format = FORMAT_S8;
1206 stencil.bytes = bytes(stencil.format);
1207 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
1208 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
1209 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
1210 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
1211 stencil.lock = LOCK_UNLOCKED;
1212 stencil.dirty = false;
1213
1214 dirtyMipmaps = true;
1215 paletteUsed = 0;
1216 }
1217
Nicolas Capensf3898612015-11-24 15:33:31 -05001218 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001219 {
1220 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -04001221 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001222 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001223 depth = max(1, depth);
1224
1225 external.buffer = 0;
1226 external.width = width;
1227 external.height = height;
1228 external.depth = depth;
1229 external.format = format;
1230 external.bytes = bytes(external.format);
1231 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
1232 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
1233 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
1234 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
1235 external.lock = LOCK_UNLOCKED;
1236 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001237
1238 internal.buffer = 0;
1239 internal.width = width;
1240 internal.height = height;
1241 internal.depth = depth;
1242 internal.format = selectInternalFormat(format);
1243 internal.bytes = bytes(internal.format);
Nicolas Capensf3898612015-11-24 15:33:31 -05001244 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1245 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided;
John Bauman89401822014-05-06 15:04:28 -04001246 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
1247 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
1248 internal.lock = LOCK_UNLOCKED;
1249 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001250
1251 stencil.buffer = 0;
1252 stencil.width = width;
1253 stencil.height = height;
1254 stencil.depth = depth;
1255 stencil.format = FORMAT_S8;
1256 stencil.bytes = bytes(stencil.format);
1257 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
1258 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
1259 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
1260 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
1261 stencil.lock = LOCK_UNLOCKED;
1262 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001263
1264 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001265 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001266 }
1267
1268 Surface::~Surface()
1269 {
John Bauman8a4f6fc2014-05-06 15:26:18 -04001270 // Synchronize so we can deallocate the buffers below
1271 resource->lock(DESTRUCT);
1272 resource->unlock();
1273
John Bauman89401822014-05-06 15:04:28 -04001274 if(!hasParent)
1275 {
1276 resource->destruct();
1277 }
1278
Nicolas Capens477314b2015-06-09 16:47:29 -04001279 if(ownExternal)
1280 {
1281 deallocate(external.buffer);
1282 }
John Bauman89401822014-05-06 15:04:28 -04001283
1284 if(internal.buffer != external.buffer)
1285 {
1286 deallocate(internal.buffer);
1287 }
1288
1289 deallocate(stencil.buffer);
1290
1291 external.buffer = 0;
1292 internal.buffer = 0;
1293 stencil.buffer = 0;
1294 }
1295
John Bauman19bac1e2014-05-06 15:23:49 -04001296 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001297 {
1298 resource->lock(client);
1299
1300 if(!external.buffer)
1301 {
1302 if(internal.buffer && identicalFormats())
1303 {
1304 external.buffer = internal.buffer;
1305 }
1306 else
1307 {
1308 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
1309 }
1310 }
1311
1312 if(internal.dirty)
1313 {
1314 if(lock != LOCK_DISCARD)
1315 {
1316 update(external, internal);
1317 }
John Bauman66b8ab22014-05-06 15:57:45 -04001318
1319 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001320 }
1321
1322 switch(lock)
1323 {
1324 case LOCK_READONLY:
1325 break;
1326 case LOCK_WRITEONLY:
1327 case LOCK_READWRITE:
1328 case LOCK_DISCARD:
1329 dirtyMipmaps = true;
1330 break;
1331 default:
1332 ASSERT(false);
1333 }
1334
John Bauman19bac1e2014-05-06 15:23:49 -04001335 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001336 }
1337
1338 void Surface::unlockExternal()
1339 {
1340 resource->unlock();
1341
1342 external.unlockRect();
1343 }
1344
John Bauman19bac1e2014-05-06 15:23:49 -04001345 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001346 {
1347 if(lock != LOCK_UNLOCKED)
1348 {
1349 resource->lock(client);
1350 }
1351
1352 if(!internal.buffer)
1353 {
1354 if(external.buffer && identicalFormats())
1355 {
1356 internal.buffer = external.buffer;
1357 }
1358 else
1359 {
1360 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
1361 }
1362 }
1363
1364 // FIXME: WHQL requires conversion to lower external precision and back
1365 if(logPrecision >= WHQL)
1366 {
1367 if(internal.dirty && renderTarget && internal.format != external.format)
1368 {
1369 if(lock != LOCK_DISCARD)
1370 {
1371 switch(external.format)
1372 {
1373 case FORMAT_R3G3B2:
1374 case FORMAT_A8R3G3B2:
1375 case FORMAT_A1R5G5B5:
1376 case FORMAT_A2R10G10B10:
1377 case FORMAT_A2B10G10R10:
1378 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1379 unlockExternal();
1380 break;
1381 default:
1382 // Difference passes WHQL
1383 break;
1384 }
1385 }
1386 }
1387 }
1388
John Bauman66b8ab22014-05-06 15:57:45 -04001389 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001390 {
1391 if(lock != LOCK_DISCARD)
1392 {
1393 update(internal, external);
1394 }
John Bauman89401822014-05-06 15:04:28 -04001395
John Bauman66b8ab22014-05-06 15:57:45 -04001396 external.dirty = false;
1397 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001398 }
1399
1400 switch(lock)
1401 {
1402 case LOCK_UNLOCKED:
1403 case LOCK_READONLY:
1404 break;
1405 case LOCK_WRITEONLY:
1406 case LOCK_READWRITE:
1407 case LOCK_DISCARD:
1408 dirtyMipmaps = true;
1409 break;
1410 default:
1411 ASSERT(false);
1412 }
1413
1414 if(lock == LOCK_READONLY && client == PUBLIC)
1415 {
1416 resolve();
1417 }
1418
John Bauman19bac1e2014-05-06 15:23:49 -04001419 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001420 }
1421
1422 void Surface::unlockInternal()
1423 {
1424 resource->unlock();
1425
1426 internal.unlockRect();
1427 }
1428
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001429 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001430 {
1431 resource->lock(client);
1432
1433 if(!stencil.buffer)
1434 {
1435 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1436 }
1437
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001438 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001439 }
1440
1441 void Surface::unlockStencil()
1442 {
1443 resource->unlock();
1444
1445 stencil.unlockRect();
1446 }
1447
1448 int Surface::bytes(Format format)
1449 {
1450 switch(format)
1451 {
1452 case FORMAT_NULL: return 0;
1453 case FORMAT_P8: return 1;
1454 case FORMAT_A8P8: return 2;
1455 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001456 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001457 case FORMAT_R8: return 1;
1458 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001459 case FORMAT_R16I: return 2;
1460 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001461 case FORMAT_A8R3G3B2: return 2;
1462 case FORMAT_R5G6B5: return 2;
1463 case FORMAT_A1R5G5B5: return 2;
1464 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001465 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001466 case FORMAT_X4R4G4B4: return 2;
1467 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001468 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001469 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001470 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001471 case FORMAT_R32I: return 4;
1472 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001473 case FORMAT_X8R8G8B8: return 4;
1474 // case FORMAT_X8G8R8B8Q: return 4;
1475 case FORMAT_A8R8G8B8: return 4;
1476 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001477 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001478 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001479 case FORMAT_SRGB8_X8: return 4;
1480 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001481 case FORMAT_A8B8G8R8I: return 4;
1482 case FORMAT_R8UI: return 1;
1483 case FORMAT_G8R8UI: return 2;
1484 case FORMAT_X8B8G8R8UI: return 4;
1485 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001486 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001487 case FORMAT_R8I_SNORM: return 1;
1488 case FORMAT_G8R8I_SNORM: return 2;
1489 case FORMAT_X8B8G8R8I_SNORM: return 4;
1490 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001491 case FORMAT_A2R10G10B10: return 4;
1492 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001493 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001494 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001495 case FORMAT_G16R16I: return 4;
1496 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001497 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001498 case FORMAT_G32R32I: return 8;
1499 case FORMAT_G32R32UI: return 8;
1500 case FORMAT_X16B16G16R16I: return 8;
1501 case FORMAT_X16B16G16R16UI: return 8;
1502 case FORMAT_A16B16G16R16I: return 8;
1503 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001504 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001505 case FORMAT_X32B32G32R32I: return 16;
1506 case FORMAT_X32B32G32R32UI: return 16;
1507 case FORMAT_A32B32G32R32I: return 16;
1508 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001509 // Compressed formats
1510 #if S3TC_SUPPORT
1511 case FORMAT_DXT1: return 2; // Column of four pixels
1512 case FORMAT_DXT3: return 4; // Column of four pixels
1513 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001514 #endif
John Bauman89401822014-05-06 15:04:28 -04001515 case FORMAT_ATI1: return 2; // Column of four pixels
1516 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001517 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001518 case FORMAT_R11_EAC: return 2;
1519 case FORMAT_SIGNED_R11_EAC: return 2;
1520 case FORMAT_RG11_EAC: return 4;
1521 case FORMAT_SIGNED_RG11_EAC: return 4;
1522 case FORMAT_RGB8_ETC2: return 2;
1523 case FORMAT_SRGB8_ETC2: return 2;
1524 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1525 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1526 case FORMAT_RGBA8_ETC2_EAC: return 4;
1527 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1528 case FORMAT_RGBA_ASTC_4x4_KHR:
1529 case FORMAT_RGBA_ASTC_5x4_KHR:
1530 case FORMAT_RGBA_ASTC_5x5_KHR:
1531 case FORMAT_RGBA_ASTC_6x5_KHR:
1532 case FORMAT_RGBA_ASTC_6x6_KHR:
1533 case FORMAT_RGBA_ASTC_8x5_KHR:
1534 case FORMAT_RGBA_ASTC_8x6_KHR:
1535 case FORMAT_RGBA_ASTC_8x8_KHR:
1536 case FORMAT_RGBA_ASTC_10x5_KHR:
1537 case FORMAT_RGBA_ASTC_10x6_KHR:
1538 case FORMAT_RGBA_ASTC_10x8_KHR:
1539 case FORMAT_RGBA_ASTC_10x10_KHR:
1540 case FORMAT_RGBA_ASTC_12x10_KHR:
1541 case FORMAT_RGBA_ASTC_12x12_KHR:
1542 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1543 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1544 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1545 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1546 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1547 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1548 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1549 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1550 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1551 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1552 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1553 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1554 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1555 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001556 // Bumpmap formats
1557 case FORMAT_V8U8: return 2;
1558 case FORMAT_L6V5U5: return 2;
1559 case FORMAT_Q8W8V8U8: return 4;
1560 case FORMAT_X8L8V8U8: return 4;
1561 case FORMAT_A2W10V10U10: return 4;
1562 case FORMAT_V16U16: return 4;
1563 case FORMAT_A16W16V16U16: return 8;
1564 case FORMAT_Q16W16V16U16: return 8;
1565 // Luminance formats
1566 case FORMAT_L8: return 1;
1567 case FORMAT_A4L4: return 1;
1568 case FORMAT_L16: return 2;
1569 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001570 case FORMAT_L16F: return 2;
1571 case FORMAT_A16L16F: return 4;
1572 case FORMAT_L32F: return 4;
1573 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001574 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001575 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001576 case FORMAT_R16F: return 2;
1577 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001578 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001579 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001580 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001581 case FORMAT_R32F: return 4;
1582 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001583 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001584 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001585 case FORMAT_A32B32G32R32F: return 16;
1586 // Depth/stencil formats
1587 case FORMAT_D16: return 2;
1588 case FORMAT_D32: return 4;
1589 case FORMAT_D24X8: return 4;
1590 case FORMAT_D24S8: return 4;
1591 case FORMAT_D24FS8: return 4;
1592 case FORMAT_D32F: return 4;
1593 case FORMAT_D32F_COMPLEMENTARY: return 4;
1594 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001595 case FORMAT_D32FS8_TEXTURE: return 4;
1596 case FORMAT_D32FS8_SHADOW: return 4;
1597 case FORMAT_DF24S8: return 4;
1598 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001599 case FORMAT_INTZ: return 4;
1600 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001601 case FORMAT_YV12_BT601: return 1; // Y plane only
1602 case FORMAT_YV12_BT709: return 1; // Y plane only
1603 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001604 default:
1605 ASSERT(false);
1606 }
1607
1608 return 0;
1609 }
1610
1611 int Surface::pitchB(int width, Format format, bool target)
1612 {
1613 if(target || isDepth(format) || isStencil(format))
1614 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001615 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001616 }
1617
1618 switch(format)
1619 {
1620 #if S3TC_SUPPORT
1621 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001622 #endif
1623 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001624 case FORMAT_R11_EAC:
1625 case FORMAT_SIGNED_R11_EAC:
1626 case FORMAT_RGB8_ETC2:
1627 case FORMAT_SRGB8_ETC2:
1628 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1629 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001630 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001631 case FORMAT_RG11_EAC:
1632 case FORMAT_SIGNED_RG11_EAC:
1633 case FORMAT_RGBA8_ETC2_EAC:
1634 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1635 case FORMAT_RGBA_ASTC_4x4_KHR:
1636 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1637 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1638 case FORMAT_RGBA_ASTC_5x4_KHR:
1639 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1640 case FORMAT_RGBA_ASTC_5x5_KHR:
1641 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1642 return 16 * ((width + 4) / 5);
1643 case FORMAT_RGBA_ASTC_6x5_KHR:
1644 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1645 case FORMAT_RGBA_ASTC_6x6_KHR:
1646 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1647 return 16 * ((width + 5) / 6);
1648 case FORMAT_RGBA_ASTC_8x5_KHR:
1649 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1650 case FORMAT_RGBA_ASTC_8x6_KHR:
1651 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1652 case FORMAT_RGBA_ASTC_8x8_KHR:
1653 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1654 return 16 * ((width + 7) / 8);
1655 case FORMAT_RGBA_ASTC_10x5_KHR:
1656 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1657 case FORMAT_RGBA_ASTC_10x6_KHR:
1658 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1659 case FORMAT_RGBA_ASTC_10x8_KHR:
1660 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1661 case FORMAT_RGBA_ASTC_10x10_KHR:
1662 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1663 return 16 * ((width + 9) / 10);
1664 case FORMAT_RGBA_ASTC_12x10_KHR:
1665 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1666 case FORMAT_RGBA_ASTC_12x12_KHR:
1667 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1668 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001669 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001670 case FORMAT_DXT3:
1671 case FORMAT_DXT5:
1672 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001673 #endif
John Bauman89401822014-05-06 15:04:28 -04001674 case FORMAT_ATI1:
1675 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1676 case FORMAT_ATI2:
1677 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001678 case FORMAT_YV12_BT601:
1679 case FORMAT_YV12_BT709:
1680 case FORMAT_YV12_JFIF:
1681 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001682 default:
1683 return bytes(format) * width;
1684 }
1685 }
1686
1687 int Surface::pitchP(int width, Format format, bool target)
1688 {
1689 int B = bytes(format);
1690
1691 return B > 0 ? pitchB(width, format, target) / B : 0;
1692 }
1693
1694 int Surface::sliceB(int width, int height, Format format, bool target)
1695 {
1696 if(target || isDepth(format) || isStencil(format))
1697 {
1698 height = ((height + 1) & ~1);
1699 }
1700
1701 switch(format)
1702 {
1703 #if S3TC_SUPPORT
1704 case FORMAT_DXT1:
1705 case FORMAT_DXT3:
1706 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001707 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001708 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001709 case FORMAT_R11_EAC:
1710 case FORMAT_SIGNED_R11_EAC:
1711 case FORMAT_RG11_EAC:
1712 case FORMAT_SIGNED_RG11_EAC:
1713 case FORMAT_RGB8_ETC2:
1714 case FORMAT_SRGB8_ETC2:
1715 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1716 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1717 case FORMAT_RGBA8_ETC2_EAC:
1718 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1719 case FORMAT_RGBA_ASTC_4x4_KHR:
1720 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1721 case FORMAT_RGBA_ASTC_5x4_KHR:
1722 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Nicolas Capens22658242014-11-29 00:31:41 -05001723 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001724 case FORMAT_RGBA_ASTC_5x5_KHR:
1725 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1726 case FORMAT_RGBA_ASTC_6x5_KHR:
1727 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1728 case FORMAT_RGBA_ASTC_8x5_KHR:
1729 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1730 case FORMAT_RGBA_ASTC_10x5_KHR:
1731 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1732 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1733 case FORMAT_RGBA_ASTC_6x6_KHR:
1734 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1735 case FORMAT_RGBA_ASTC_8x6_KHR:
1736 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1737 case FORMAT_RGBA_ASTC_10x6_KHR:
1738 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1739 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1740 case FORMAT_RGBA_ASTC_8x8_KHR:
1741 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1742 case FORMAT_RGBA_ASTC_10x8_KHR:
1743 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1744 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1745 case FORMAT_RGBA_ASTC_10x10_KHR:
1746 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1747 case FORMAT_RGBA_ASTC_12x10_KHR:
1748 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1749 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1750 case FORMAT_RGBA_ASTC_12x12_KHR:
1751 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1752 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001753 case FORMAT_ATI1:
1754 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001755 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001756 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001757 }
1758 }
1759
1760 int Surface::sliceP(int width, int height, Format format, bool target)
1761 {
1762 int B = bytes(format);
1763
1764 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1765 }
1766
1767 void Surface::update(Buffer &destination, Buffer &source)
1768 {
1769 // ASSERT(source.lock != LOCK_UNLOCKED);
1770 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001771
John Bauman89401822014-05-06 15:04:28 -04001772 if(destination.buffer != source.buffer)
1773 {
1774 ASSERT(source.dirty && !destination.dirty);
1775
1776 switch(source.format)
1777 {
1778 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001779 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1780 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1781 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1782 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1783 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1784 #if S3TC_SUPPORT
1785 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1786 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1787 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001788 #endif
John Bauman89401822014-05-06 15:04:28 -04001789 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1790 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001791 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1792 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1793 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1794 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001795 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001796 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1797 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1798 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1799 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1800 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1801 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1802 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1803 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1804 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1805 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1806 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1807 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1808 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1809 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1810 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1811 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1812 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1813 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1814 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1815 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1816 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1817 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1818 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1819 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1820 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1821 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1822 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1823 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1824 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1825 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1826 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1827 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1828 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1829 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001830 default: genericUpdate(destination, source); break;
1831 }
1832 }
John Bauman89401822014-05-06 15:04:28 -04001833 }
1834
1835 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1836 {
1837 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1838 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1839
1840 int depth = min(destination.depth, source.depth);
1841 int height = min(destination.height, source.height);
1842 int width = min(destination.width, source.width);
1843 int rowBytes = width * source.bytes;
1844
1845 for(int z = 0; z < depth; z++)
1846 {
1847 unsigned char *sourceRow = sourceSlice;
1848 unsigned char *destinationRow = destinationSlice;
1849
1850 for(int y = 0; y < height; y++)
1851 {
1852 if(source.format == destination.format)
1853 {
1854 memcpy(destinationRow, sourceRow, rowBytes);
1855 }
1856 else
1857 {
1858 unsigned char *sourceElement = sourceRow;
1859 unsigned char *destinationElement = destinationRow;
1860
1861 for(int x = 0; x < width; x++)
1862 {
1863 Color<float> color = source.read(sourceElement);
1864 destination.write(destinationElement, color);
1865
1866 sourceElement += source.bytes;
1867 destinationElement += destination.bytes;
1868 }
1869 }
1870
1871 sourceRow += source.pitchB;
1872 destinationRow += destination.pitchB;
1873 }
1874
1875 sourceSlice += source.sliceB;
1876 destinationSlice += destination.sliceB;
1877 }
1878 }
1879
1880 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1881 {
1882 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1883 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1884
1885 for(int z = 0; z < destination.depth && z < source.depth; z++)
1886 {
1887 unsigned char *sourceRow = sourceSlice;
1888 unsigned char *destinationRow = destinationSlice;
1889
1890 for(int y = 0; y < destination.height && y < source.height; y++)
1891 {
1892 unsigned char *sourceElement = sourceRow;
1893 unsigned char *destinationElement = destinationRow;
1894
1895 for(int x = 0; x < destination.width && x < source.width; x++)
1896 {
1897 unsigned int b = sourceElement[0];
1898 unsigned int g = sourceElement[1];
1899 unsigned int r = sourceElement[2];
1900
1901 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1902
1903 sourceElement += source.bytes;
1904 destinationElement += destination.bytes;
1905 }
1906
1907 sourceRow += source.pitchB;
1908 destinationRow += destination.pitchB;
1909 }
1910
1911 sourceSlice += source.sliceB;
1912 destinationSlice += destination.sliceB;
1913 }
1914 }
1915
John Bauman89401822014-05-06 15:04:28 -04001916 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1917 {
1918 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1919 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1920
1921 for(int z = 0; z < destination.depth && z < source.depth; z++)
1922 {
1923 unsigned char *sourceRow = sourceSlice;
1924 unsigned char *destinationRow = destinationSlice;
1925
1926 for(int y = 0; y < destination.height && y < source.height; y++)
1927 {
1928 unsigned char *sourceElement = sourceRow;
1929 unsigned char *destinationElement = destinationRow;
1930
1931 for(int x = 0; x < destination.width && x < source.width; x++)
1932 {
1933 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001934
John Bauman89401822014-05-06 15:04:28 -04001935 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1936 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1937 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1938
1939 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1940
1941 sourceElement += source.bytes;
1942 destinationElement += destination.bytes;
1943 }
1944
1945 sourceRow += source.pitchB;
1946 destinationRow += destination.pitchB;
1947 }
1948
1949 sourceSlice += source.sliceB;
1950 destinationSlice += destination.sliceB;
1951 }
1952 }
1953
1954 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1955 {
1956 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1957 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1958
1959 for(int z = 0; z < destination.depth && z < source.depth; z++)
1960 {
1961 unsigned char *sourceRow = sourceSlice;
1962 unsigned char *destinationRow = destinationSlice;
1963
1964 for(int y = 0; y < destination.height && y < source.height; y++)
1965 {
1966 unsigned char *sourceElement = sourceRow;
1967 unsigned char *destinationElement = destinationRow;
1968
1969 for(int x = 0; x < destination.width && x < source.width; x++)
1970 {
1971 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001972
John Bauman89401822014-05-06 15:04:28 -04001973 unsigned int a = (argb & 0x8000) * 130560;
1974 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1975 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1976 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1977
1978 *(unsigned int*)destinationElement = a | r | g | b;
1979
1980 sourceElement += source.bytes;
1981 destinationElement += destination.bytes;
1982 }
1983
1984 sourceRow += source.pitchB;
1985 destinationRow += destination.pitchB;
1986 }
1987
1988 sourceSlice += source.sliceB;
1989 destinationSlice += destination.sliceB;
1990 }
1991 }
1992
1993 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1994 {
1995 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1996 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1997
1998 for(int z = 0; z < destination.depth && z < source.depth; z++)
1999 {
2000 unsigned char *sourceRow = sourceSlice;
2001 unsigned char *destinationRow = destinationSlice;
2002
2003 for(int y = 0; y < destination.height && y < source.height; y++)
2004 {
2005 unsigned char *sourceElement = sourceRow;
2006 unsigned char *destinationElement = destinationRow;
2007
2008 for(int x = 0; x < destination.width && x < source.width; x++)
2009 {
2010 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002011
John Bauman89401822014-05-06 15:04:28 -04002012 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2013 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2014 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2015
2016 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2017
2018 sourceElement += source.bytes;
2019 destinationElement += destination.bytes;
2020 }
2021
2022 sourceRow += source.pitchB;
2023 destinationRow += destination.pitchB;
2024 }
2025
2026 sourceSlice += source.sliceB;
2027 destinationSlice += destination.sliceB;
2028 }
2029 }
2030
2031 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
2032 {
2033 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2034 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2035
2036 for(int z = 0; z < destination.depth && z < source.depth; z++)
2037 {
2038 unsigned char *sourceRow = sourceSlice;
2039 unsigned char *destinationRow = destinationSlice;
2040
2041 for(int y = 0; y < destination.height && y < source.height; y++)
2042 {
2043 unsigned char *sourceElement = sourceRow;
2044 unsigned char *destinationElement = destinationRow;
2045
2046 for(int x = 0; x < destination.width && x < source.width; x++)
2047 {
2048 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002049
John Bauman89401822014-05-06 15:04:28 -04002050 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2051 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2052 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2053 unsigned int b = (argb & 0x000F) * 0x00000011;
2054
2055 *(unsigned int*)destinationElement = a | r | g | b;
2056
2057 sourceElement += source.bytes;
2058 destinationElement += destination.bytes;
2059 }
2060
2061 sourceRow += source.pitchB;
2062 destinationRow += destination.pitchB;
2063 }
2064
2065 sourceSlice += source.sliceB;
2066 destinationSlice += destination.sliceB;
2067 }
2068 }
2069
2070 void Surface::decodeP8(Buffer &destination, const Buffer &source)
2071 {
2072 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2073 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2074
2075 for(int z = 0; z < destination.depth && z < source.depth; z++)
2076 {
2077 unsigned char *sourceRow = sourceSlice;
2078 unsigned char *destinationRow = destinationSlice;
2079
2080 for(int y = 0; y < destination.height && y < source.height; y++)
2081 {
2082 unsigned char *sourceElement = sourceRow;
2083 unsigned char *destinationElement = destinationRow;
2084
2085 for(int x = 0; x < destination.width && x < source.width; x++)
2086 {
2087 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2088
2089 unsigned int r = (abgr & 0x000000FF) << 16;
2090 unsigned int g = (abgr & 0x0000FF00) << 0;
2091 unsigned int b = (abgr & 0x00FF0000) >> 16;
2092 unsigned int a = (abgr & 0xFF000000) >> 0;
2093
2094 *(unsigned int*)destinationElement = a | r | g | b;
2095
2096 sourceElement += source.bytes;
2097 destinationElement += destination.bytes;
2098 }
2099
2100 sourceRow += source.pitchB;
2101 destinationRow += destination.pitchB;
2102 }
2103
2104 sourceSlice += source.sliceB;
2105 destinationSlice += destination.sliceB;
2106 }
2107 }
2108
2109#if S3TC_SUPPORT
2110 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
2111 {
2112 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002113 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002114
2115 for(int z = 0; z < external.depth; z++)
2116 {
2117 unsigned int *dest = destSlice;
2118
2119 for(int y = 0; y < external.height; y += 4)
2120 {
2121 for(int x = 0; x < external.width; x += 4)
2122 {
2123 Color<byte> c[4];
2124
2125 c[0] = source->c0;
2126 c[1] = source->c1;
2127
2128 if(source->c0 > source->c1) // No transparency
2129 {
2130 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2131 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2132 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2133 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2134 c[2].a = 0xFF;
2135
2136 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2137 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2138 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2139 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2140 c[3].a = 0xFF;
2141 }
2142 else // c3 transparent
2143 {
2144 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2145 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2146 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2147 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2148 c[2].a = 0xFF;
2149
2150 c[3].r = 0;
2151 c[3].g = 0;
2152 c[3].b = 0;
2153 c[3].a = 0;
2154 }
2155
2156 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2157 {
2158 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2159 {
2160 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2161 }
2162 }
2163
2164 source++;
2165 }
2166 }
2167
2168 (byte*&)destSlice += internal.sliceB;
2169 }
2170 }
2171
2172 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
2173 {
2174 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002175 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002176
2177 for(int z = 0; z < external.depth; z++)
2178 {
2179 unsigned int *dest = destSlice;
2180
2181 for(int y = 0; y < external.height; y += 4)
2182 {
2183 for(int x = 0; x < external.width; x += 4)
2184 {
2185 Color<byte> c[4];
2186
2187 c[0] = source->c0;
2188 c[1] = source->c1;
2189
2190 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2191 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2192 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2193 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2194
2195 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2196 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2197 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2198 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2199
2200 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2201 {
2202 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2203 {
2204 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2205 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2206
2207 dest[(x + i) + (y + j) * internal.width] = color;
2208 }
2209 }
2210
2211 source++;
2212 }
2213 }
2214
2215 (byte*&)destSlice += internal.sliceB;
2216 }
2217 }
2218
2219 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
2220 {
2221 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002222 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002223
2224 for(int z = 0; z < external.depth; z++)
2225 {
2226 unsigned int *dest = destSlice;
2227
2228 for(int y = 0; y < external.height; y += 4)
2229 {
2230 for(int x = 0; x < external.width; x += 4)
2231 {
2232 Color<byte> c[4];
2233
2234 c[0] = source->c0;
2235 c[1] = source->c1;
2236
2237 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2238 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2239 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2240 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2241
2242 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2243 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2244 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2245 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2246
2247 byte a[8];
2248
2249 a[0] = source->a0;
2250 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002251
John Bauman89401822014-05-06 15:04:28 -04002252 if(a[0] > a[1])
2253 {
2254 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2255 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2256 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2257 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2258 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2259 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2260 }
2261 else
2262 {
2263 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2264 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2265 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2266 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2267 a[6] = 0;
2268 a[7] = 0xFF;
2269 }
2270
2271 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2272 {
2273 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2274 {
2275 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2276 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002277
John Bauman89401822014-05-06 15:04:28 -04002278 dest[(x + i) + (y + j) * internal.width] = color;
2279 }
2280 }
2281
2282 source++;
2283 }
2284 }
2285
2286 (byte*&)destSlice += internal.sliceB;
2287 }
2288 }
Nicolas Capens22658242014-11-29 00:31:41 -05002289#endif
John Bauman89401822014-05-06 15:04:28 -04002290
2291 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
2292 {
2293 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002294 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002295
2296 for(int z = 0; z < external.depth; z++)
2297 {
2298 byte *dest = destSlice;
2299
2300 for(int y = 0; y < external.height; y += 4)
2301 {
2302 for(int x = 0; x < external.width; x += 4)
2303 {
2304 byte r[8];
2305
2306 r[0] = source->r0;
2307 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002308
John Bauman89401822014-05-06 15:04:28 -04002309 if(r[0] > r[1])
2310 {
2311 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2312 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2313 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2314 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2315 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2316 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2317 }
2318 else
2319 {
2320 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2321 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2322 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2323 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2324 r[6] = 0;
2325 r[7] = 0xFF;
2326 }
2327
2328 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2329 {
2330 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2331 {
2332 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2333 }
2334 }
2335
2336 source++;
2337 }
2338 }
2339
2340 destSlice += internal.sliceB;
2341 }
2342 }
2343
2344 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
2345 {
2346 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002347 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002348
2349 for(int z = 0; z < external.depth; z++)
2350 {
2351 word *dest = destSlice;
2352
2353 for(int y = 0; y < external.height; y += 4)
2354 {
2355 for(int x = 0; x < external.width; x += 4)
2356 {
2357 byte X[8];
2358
2359 X[0] = source->x0;
2360 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002361
John Bauman89401822014-05-06 15:04:28 -04002362 if(X[0] > X[1])
2363 {
2364 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2365 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2366 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2367 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2368 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2369 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2370 }
2371 else
2372 {
2373 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2374 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2375 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2376 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2377 X[6] = 0;
2378 X[7] = 0xFF;
2379 }
2380
2381 byte Y[8];
2382
2383 Y[0] = source->y0;
2384 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002385
John Bauman89401822014-05-06 15:04:28 -04002386 if(Y[0] > Y[1])
2387 {
2388 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2389 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2390 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2391 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2392 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2393 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2394 }
2395 else
2396 {
2397 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2398 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2399 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2400 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2401 Y[6] = 0;
2402 Y[7] = 0xFF;
2403 }
2404
2405 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2406 {
2407 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2408 {
2409 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2410 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2411
2412 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2413 }
2414 }
2415
2416 source++;
2417 }
2418 }
2419
2420 (byte*&)destSlice += internal.sliceB;
2421 }
2422 }
Nicolas Capens22658242014-11-29 00:31:41 -05002423
Alexis Hetu0de50d42015-09-09 13:56:41 -04002424 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002425 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002426 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2427 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Nicolas Capens22658242014-11-29 00:31:41 -05002428
Alexis Hetu0de50d42015-09-09 13:56:41 -04002429 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002430 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002431 static byte sRGBtoLinearTable[256];
2432 static bool sRGBtoLinearTableDirty = true;
2433 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002434 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002435 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002436 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002437 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002438 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002439 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002440 }
2441
Alexis Hetu0de50d42015-09-09 13:56:41 -04002442 // Perform sRGB conversion in place after decoding
2443 byte* src = (byte*)internal.buffer;
2444 for(int y = 0; y < internal.height; y++)
2445 {
2446 byte* srcRow = src + y * internal.pitchB;
2447 for(int x = 0; x < internal.width; x++)
2448 {
2449 byte* srcPix = srcRow + x * internal.bytes;
2450 for(int i = 0; i < 3; i++)
2451 {
2452 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2453 }
2454 }
2455 }
Nicolas Capens22658242014-11-29 00:31:41 -05002456 }
2457 }
John Bauman89401822014-05-06 15:04:28 -04002458
Alexis Hetu460e41f2015-09-01 10:58:37 -04002459 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
2460 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002461 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002462
Alexis Hetu0de50d42015-09-09 13:56:41 -04002463 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2464 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2465
2466 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2467 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2468 if(isSigned)
2469 {
2470 sbyte* src = (sbyte*)internal.buffer;
2471
2472 for(int y = 0; y < internal.height; y++)
2473 {
2474 sbyte* srcRow = src + y * internal.pitchB;
2475 for(int x = internal.width - 1; x >= 0; x--)
2476 {
2477 int dx = x & 0xFFFFFFFC;
2478 int mx = x - dx;
2479 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2480 float* dstPix = (float*)(srcRow + x * internal.bytes);
2481 for(int c = nbChannels - 1; c >= 0; c--)
2482 {
2483 static const float normalization = 1.0f / 127.875f;
2484 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2485 }
2486 }
2487 }
2488 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002489 }
2490
2491 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2492 {
2493 }
2494
John Bauman89401822014-05-06 15:04:28 -04002495 unsigned int Surface::size(int width, int height, int depth, Format format)
2496 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002497 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002498 int width4 = align(width, 4);
2499 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002500
2501 switch(format)
2502 {
2503 #if S3TC_SUPPORT
2504 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002505 #endif
John Bauman89401822014-05-06 15:04:28 -04002506 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002507 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002508 case FORMAT_R11_EAC:
2509 case FORMAT_SIGNED_R11_EAC:
2510 case FORMAT_RGB8_ETC2:
2511 case FORMAT_SRGB8_ETC2:
2512 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2513 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002514 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002515 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002516 case FORMAT_DXT3:
2517 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002518 #endif
John Bauman89401822014-05-06 15:04:28 -04002519 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002520 case FORMAT_RG11_EAC:
2521 case FORMAT_SIGNED_RG11_EAC:
2522 case FORMAT_RGBA8_ETC2_EAC:
2523 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2524 case FORMAT_RGBA_ASTC_4x4_KHR:
2525 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002526 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002527 case FORMAT_RGBA_ASTC_5x4_KHR:
2528 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2529 return align(width, 5) * height4 * depth;
2530 case FORMAT_RGBA_ASTC_5x5_KHR:
2531 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2532 return align(width, 5) * align(height, 5) * depth;
2533 case FORMAT_RGBA_ASTC_6x5_KHR:
2534 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2535 return align(width, 6) * align(height, 5) * depth;
2536 case FORMAT_RGBA_ASTC_6x6_KHR:
2537 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2538 return align(width, 6) * align(height, 6) * depth;
2539 case FORMAT_RGBA_ASTC_8x5_KHR:
2540 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2541 return align(width, 8) * align(height, 5) * depth;
2542 case FORMAT_RGBA_ASTC_8x6_KHR:
2543 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2544 return align(width, 8) * align(height, 6) * depth;
2545 case FORMAT_RGBA_ASTC_8x8_KHR:
2546 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2547 return align(width, 8) * align(height, 8) * depth;
2548 case FORMAT_RGBA_ASTC_10x5_KHR:
2549 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2550 return align(width, 10) * align(height, 5) * depth;
2551 case FORMAT_RGBA_ASTC_10x6_KHR:
2552 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2553 return align(width, 10) * align(height, 6) * depth;
2554 case FORMAT_RGBA_ASTC_10x8_KHR:
2555 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2556 return align(width, 10) * align(height, 8) * depth;
2557 case FORMAT_RGBA_ASTC_10x10_KHR:
2558 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2559 return align(width, 10) * align(height, 10) * depth;
2560 case FORMAT_RGBA_ASTC_12x10_KHR:
2561 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2562 return align(width, 12) * align(height, 10) * depth;
2563 case FORMAT_RGBA_ASTC_12x12_KHR:
2564 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2565 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002566 case FORMAT_YV12_BT601:
2567 case FORMAT_YV12_BT709:
2568 case FORMAT_YV12_JFIF:
2569 {
2570 unsigned int YStride = align(width, 16);
2571 unsigned int YSize = YStride * height;
2572 unsigned int CStride = align(YStride / 2, 16);
Nicolas Capens0bac2852016-05-07 06:09:58 -04002573 unsigned int CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002574
2575 return YSize + 2 * CSize;
2576 }
John Bauman89401822014-05-06 15:04:28 -04002577 default:
2578 return bytes(format) * width * height * depth;
2579 }
John Bauman89401822014-05-06 15:04:28 -04002580 }
2581
2582 bool Surface::isStencil(Format format)
2583 {
2584 switch(format)
2585 {
2586 case FORMAT_D32:
2587 case FORMAT_D16:
2588 case FORMAT_D24X8:
2589 case FORMAT_D32F:
2590 case FORMAT_D32F_COMPLEMENTARY:
2591 case FORMAT_D32F_LOCKABLE:
2592 return false;
2593 case FORMAT_D24S8:
2594 case FORMAT_D24FS8:
2595 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002596 case FORMAT_DF24S8:
2597 case FORMAT_DF16S8:
2598 case FORMAT_D32FS8_TEXTURE:
2599 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002600 case FORMAT_INTZ:
2601 return true;
2602 default:
2603 return false;
2604 }
2605 }
2606
2607 bool Surface::isDepth(Format format)
2608 {
2609 switch(format)
2610 {
2611 case FORMAT_D32:
2612 case FORMAT_D16:
2613 case FORMAT_D24X8:
2614 case FORMAT_D24S8:
2615 case FORMAT_D24FS8:
2616 case FORMAT_D32F:
2617 case FORMAT_D32F_COMPLEMENTARY:
2618 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002619 case FORMAT_DF24S8:
2620 case FORMAT_DF16S8:
2621 case FORMAT_D32FS8_TEXTURE:
2622 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002623 case FORMAT_INTZ:
2624 return true;
2625 case FORMAT_S8:
2626 return false;
2627 default:
2628 return false;
2629 }
2630 }
2631
Alexis Hetub9dda642016-10-06 11:25:32 -04002632 bool Surface::hasQuadLayout(Format format)
2633 {
2634 switch(format)
2635 {
2636 case FORMAT_D32:
2637 case FORMAT_D16:
2638 case FORMAT_D24X8:
2639 case FORMAT_D24S8:
2640 case FORMAT_D24FS8:
2641 case FORMAT_D32F:
2642 case FORMAT_D32F_COMPLEMENTARY:
2643 case FORMAT_DF24S8:
2644 case FORMAT_DF16S8:
2645 case FORMAT_INTZ:
2646 case FORMAT_S8:
2647 case FORMAT_A8G8R8B8Q:
2648 case FORMAT_X8G8R8B8Q:
2649 return true;
2650 case FORMAT_D32F_LOCKABLE:
2651 case FORMAT_D32FS8_TEXTURE:
2652 case FORMAT_D32FS8_SHADOW:
2653 default:
2654 break;
2655 }
2656
2657 return false;
2658 }
2659
John Bauman89401822014-05-06 15:04:28 -04002660 bool Surface::isPalette(Format format)
2661 {
2662 switch(format)
2663 {
2664 case FORMAT_P8:
2665 case FORMAT_A8P8:
2666 return true;
2667 default:
2668 return false;
2669 }
2670 }
2671
2672 bool Surface::isFloatFormat(Format format)
2673 {
2674 switch(format)
2675 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002676 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002677 case FORMAT_R8G8B8:
2678 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002679 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002680 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002681 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002682 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002683 case FORMAT_SRGB8_X8:
2684 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002685 case FORMAT_A8B8G8R8I:
2686 case FORMAT_R8UI:
2687 case FORMAT_G8R8UI:
2688 case FORMAT_X8B8G8R8UI:
2689 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002690 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002691 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002692 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002693 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002694 case FORMAT_R8I_SNORM:
2695 case FORMAT_G8R8I_SNORM:
2696 case FORMAT_X8B8G8R8I_SNORM:
2697 case FORMAT_A8B8G8R8I_SNORM:
2698 case FORMAT_R16I:
2699 case FORMAT_R16UI:
2700 case FORMAT_G16R16I:
2701 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002702 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002703 case FORMAT_X16B16G16R16I:
2704 case FORMAT_X16B16G16R16UI:
2705 case FORMAT_A16B16G16R16I:
2706 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002707 case FORMAT_A16B16G16R16:
2708 case FORMAT_V8U8:
2709 case FORMAT_Q8W8V8U8:
2710 case FORMAT_X8L8V8U8:
2711 case FORMAT_V16U16:
2712 case FORMAT_A16W16V16U16:
2713 case FORMAT_Q16W16V16U16:
2714 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002715 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002716 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002717 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002718 case FORMAT_L8:
2719 case FORMAT_L16:
2720 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002721 case FORMAT_YV12_BT601:
2722 case FORMAT_YV12_BT709:
2723 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002724 case FORMAT_R32I:
2725 case FORMAT_R32UI:
2726 case FORMAT_G32R32I:
2727 case FORMAT_G32R32UI:
2728 case FORMAT_X32B32G32R32I:
2729 case FORMAT_X32B32G32R32UI:
2730 case FORMAT_A32B32G32R32I:
2731 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002732 return false;
2733 case FORMAT_R32F:
2734 case FORMAT_G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002735 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002736 case FORMAT_A32B32G32R32F:
2737 case FORMAT_D32F:
2738 case FORMAT_D32F_COMPLEMENTARY:
2739 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002740 case FORMAT_D32FS8_TEXTURE:
2741 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002742 case FORMAT_L16F:
2743 case FORMAT_A16L16F:
2744 case FORMAT_L32F:
2745 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002746 return true;
2747 default:
2748 ASSERT(false);
2749 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002750
John Bauman89401822014-05-06 15:04:28 -04002751 return false;
2752 }
2753
2754 bool Surface::isUnsignedComponent(Format format, int component)
2755 {
2756 switch(format)
2757 {
2758 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002759 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002760 case FORMAT_R8G8B8:
2761 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002762 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002763 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002764 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002765 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002766 case FORMAT_SRGB8_X8:
2767 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002768 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002769 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002770 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002771 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002772 case FORMAT_G16R16UI:
2773 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002774 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002775 case FORMAT_A16B16G16R16UI:
2776 case FORMAT_R32UI:
2777 case FORMAT_G32R32UI:
2778 case FORMAT_X32B32G32R32UI:
2779 case FORMAT_A32B32G32R32UI:
2780 case FORMAT_R8UI:
2781 case FORMAT_G8R8UI:
2782 case FORMAT_X8B8G8R8UI:
2783 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002784 case FORMAT_D32F:
2785 case FORMAT_D32F_COMPLEMENTARY:
2786 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002787 case FORMAT_D32FS8_TEXTURE:
2788 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002789 case FORMAT_A8:
2790 case FORMAT_R8:
2791 case FORMAT_L8:
2792 case FORMAT_L16:
2793 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002794 case FORMAT_YV12_BT601:
2795 case FORMAT_YV12_BT709:
2796 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002797 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002798 case FORMAT_A8B8G8R8I:
2799 case FORMAT_A16B16G16R16I:
2800 case FORMAT_A32B32G32R32I:
2801 case FORMAT_A8B8G8R8I_SNORM:
2802 case FORMAT_Q8W8V8U8:
2803 case FORMAT_Q16W16V16U16:
2804 case FORMAT_A32B32G32R32F:
2805 return false;
2806 case FORMAT_R32F:
2807 case FORMAT_R8I:
2808 case FORMAT_R16I:
2809 case FORMAT_R32I:
2810 case FORMAT_R8I_SNORM:
2811 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002812 case FORMAT_V8U8:
2813 case FORMAT_X8L8V8U8:
2814 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002815 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002816 case FORMAT_G8R8I:
2817 case FORMAT_G16R16I:
2818 case FORMAT_G32R32I:
2819 case FORMAT_G8R8I_SNORM:
2820 return component >= 2;
2821 case FORMAT_A16W16V16U16:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002822 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002823 case FORMAT_X8B8G8R8I:
2824 case FORMAT_X16B16G16R16I:
2825 case FORMAT_X32B32G32R32I:
2826 case FORMAT_X8B8G8R8I_SNORM:
2827 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002828 default:
2829 ASSERT(false);
2830 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002831
John Bauman89401822014-05-06 15:04:28 -04002832 return false;
2833 }
2834
2835 bool Surface::isSRGBreadable(Format format)
2836 {
2837 // Keep in sync with Capabilities::isSRGBreadable
2838 switch(format)
2839 {
2840 case FORMAT_L8:
2841 case FORMAT_A8L8:
2842 case FORMAT_R8G8B8:
2843 case FORMAT_A8R8G8B8:
2844 case FORMAT_X8R8G8B8:
2845 case FORMAT_A8B8G8R8:
2846 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002847 case FORMAT_SRGB8_X8:
2848 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002849 case FORMAT_R5G6B5:
2850 case FORMAT_X1R5G5B5:
2851 case FORMAT_A1R5G5B5:
2852 case FORMAT_A4R4G4B4:
2853 #if S3TC_SUPPORT
2854 case FORMAT_DXT1:
2855 case FORMAT_DXT3:
2856 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002857 #endif
John Bauman89401822014-05-06 15:04:28 -04002858 case FORMAT_ATI1:
2859 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002860 return true;
2861 default:
2862 return false;
2863 }
John Bauman89401822014-05-06 15:04:28 -04002864 }
2865
2866 bool Surface::isSRGBwritable(Format format)
2867 {
2868 // Keep in sync with Capabilities::isSRGBwritable
2869 switch(format)
2870 {
2871 case FORMAT_NULL:
2872 case FORMAT_A8R8G8B8:
2873 case FORMAT_X8R8G8B8:
2874 case FORMAT_A8B8G8R8:
2875 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002876 case FORMAT_SRGB8_X8:
2877 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002878 case FORMAT_R5G6B5:
2879 return true;
2880 default:
2881 return false;
2882 }
2883 }
2884
2885 bool Surface::isCompressed(Format format)
2886 {
2887 switch(format)
2888 {
2889 #if S3TC_SUPPORT
2890 case FORMAT_DXT1:
2891 case FORMAT_DXT3:
2892 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002893 #endif
John Bauman89401822014-05-06 15:04:28 -04002894 case FORMAT_ATI1:
2895 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002896 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002897 case FORMAT_R11_EAC:
2898 case FORMAT_SIGNED_R11_EAC:
2899 case FORMAT_RG11_EAC:
2900 case FORMAT_SIGNED_RG11_EAC:
2901 case FORMAT_RGB8_ETC2:
2902 case FORMAT_SRGB8_ETC2:
2903 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2904 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2905 case FORMAT_RGBA8_ETC2_EAC:
2906 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2907 case FORMAT_RGBA_ASTC_4x4_KHR:
2908 case FORMAT_RGBA_ASTC_5x4_KHR:
2909 case FORMAT_RGBA_ASTC_5x5_KHR:
2910 case FORMAT_RGBA_ASTC_6x5_KHR:
2911 case FORMAT_RGBA_ASTC_6x6_KHR:
2912 case FORMAT_RGBA_ASTC_8x5_KHR:
2913 case FORMAT_RGBA_ASTC_8x6_KHR:
2914 case FORMAT_RGBA_ASTC_8x8_KHR:
2915 case FORMAT_RGBA_ASTC_10x5_KHR:
2916 case FORMAT_RGBA_ASTC_10x6_KHR:
2917 case FORMAT_RGBA_ASTC_10x8_KHR:
2918 case FORMAT_RGBA_ASTC_10x10_KHR:
2919 case FORMAT_RGBA_ASTC_12x10_KHR:
2920 case FORMAT_RGBA_ASTC_12x12_KHR:
2921 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
2922 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2923 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2924 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2925 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2926 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2927 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2928 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2929 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2930 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2931 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2932 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2933 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2934 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04002935 return true;
John Bauman89401822014-05-06 15:04:28 -04002936 default:
2937 return false;
2938 }
2939 }
2940
Alexis Hetu43577b82015-10-21 15:32:16 -04002941 bool Surface::isNonNormalizedInteger(Format format)
2942 {
2943 switch(format)
2944 {
2945 case FORMAT_A8B8G8R8I:
2946 case FORMAT_X8B8G8R8I:
2947 case FORMAT_G8R8I:
2948 case FORMAT_R8I:
2949 case FORMAT_A8B8G8R8UI:
2950 case FORMAT_X8B8G8R8UI:
2951 case FORMAT_G8R8UI:
2952 case FORMAT_R8UI:
2953 case FORMAT_A16B16G16R16I:
2954 case FORMAT_X16B16G16R16I:
2955 case FORMAT_G16R16I:
2956 case FORMAT_R16I:
2957 case FORMAT_A16B16G16R16UI:
2958 case FORMAT_X16B16G16R16UI:
2959 case FORMAT_G16R16UI:
2960 case FORMAT_R16UI:
2961 case FORMAT_A32B32G32R32I:
2962 case FORMAT_X32B32G32R32I:
2963 case FORMAT_G32R32I:
2964 case FORMAT_R32I:
2965 case FORMAT_A32B32G32R32UI:
2966 case FORMAT_X32B32G32R32UI:
2967 case FORMAT_G32R32UI:
2968 case FORMAT_R32UI:
2969 return true;
2970 default:
2971 return false;
2972 }
2973 }
2974
John Bauman89401822014-05-06 15:04:28 -04002975 int Surface::componentCount(Format format)
2976 {
2977 switch(format)
2978 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002979 case FORMAT_R5G6B5: return 3;
2980 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002981 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002982 case FORMAT_X8B8G8R8: return 3;
2983 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04002984 case FORMAT_SRGB8_X8: return 3;
2985 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002986 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002987 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002988 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002989 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002990 case FORMAT_R8I_SNORM: return 1;
2991 case FORMAT_G8R8I_SNORM: return 2;
2992 case FORMAT_X8B8G8R8I_SNORM:return 3;
2993 case FORMAT_A8B8G8R8I_SNORM:return 4;
2994 case FORMAT_R8UI: return 1;
2995 case FORMAT_G8R8UI: return 2;
2996 case FORMAT_X8B8G8R8UI: return 3;
2997 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05002998 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002999 case FORMAT_G16R16I: return 2;
3000 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003001 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003002 case FORMAT_G32R32I: return 2;
3003 case FORMAT_G32R32UI: return 2;
3004 case FORMAT_X16B16G16R16I: return 3;
3005 case FORMAT_X16B16G16R16UI: return 3;
3006 case FORMAT_A16B16G16R16I: return 4;
3007 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003008 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003009 case FORMAT_X32B32G32R32I: return 3;
3010 case FORMAT_X32B32G32R32UI: return 3;
3011 case FORMAT_A32B32G32R32I: return 4;
3012 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003013 case FORMAT_V8U8: return 2;
3014 case FORMAT_Q8W8V8U8: return 4;
3015 case FORMAT_X8L8V8U8: return 3;
3016 case FORMAT_V16U16: return 2;
3017 case FORMAT_A16W16V16U16: return 4;
3018 case FORMAT_Q16W16V16U16: return 4;
3019 case FORMAT_R32F: return 1;
3020 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003021 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003022 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003023 case FORMAT_D32F: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003024 case FORMAT_D32F_LOCKABLE: return 1;
3025 case FORMAT_D32FS8_TEXTURE: return 1;
3026 case FORMAT_D32FS8_SHADOW: return 1;
3027 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003028 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003029 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003030 case FORMAT_R16I: return 1;
3031 case FORMAT_R16UI: return 1;
3032 case FORMAT_R32I: return 1;
3033 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003034 case FORMAT_L8: return 1;
3035 case FORMAT_L16: return 1;
3036 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003037 case FORMAT_YV12_BT601: return 3;
3038 case FORMAT_YV12_BT709: return 3;
3039 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003040 default:
3041 ASSERT(false);
3042 }
3043
3044 return 1;
3045 }
3046
3047 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
3048 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003049 // Render targets require 2x2 quads
3050 int width2 = (width + 1) & ~1;
3051 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003052
Nicolas Capens6ea71872015-06-26 13:00:48 -04003053 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
3054 // so we have to allocate 4 extra bytes to avoid buffer overruns.
3055 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003056 }
3057
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003058 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003059 {
3060 while((size_t)buffer & 0x1 && bytes >= 1)
3061 {
3062 *(char*)buffer = (char)pattern;
3063 (char*&)buffer += 1;
3064 bytes -= 1;
3065 }
3066
3067 while((size_t)buffer & 0x3 && bytes >= 2)
3068 {
3069 *(short*)buffer = (short)pattern;
3070 (short*&)buffer += 1;
3071 bytes -= 2;
3072 }
3073
3074 if(CPUID::supportsSSE())
3075 {
3076 while((size_t)buffer & 0xF && bytes >= 4)
3077 {
3078 *(int*)buffer = pattern;
3079 (int*&)buffer += 1;
3080 bytes -= 4;
3081 }
3082
3083 __m128 quad = _mm_set_ps1((float&)pattern);
Nicolas Capensc39901e2016-03-21 16:37:44 -04003084
John Bauman89401822014-05-06 15:04:28 -04003085 float *pointer = (float*)buffer;
3086 int qxwords = bytes / 64;
3087 bytes -= qxwords * 64;
3088
3089 while(qxwords--)
3090 {
3091 _mm_stream_ps(pointer + 0, quad);
3092 _mm_stream_ps(pointer + 4, quad);
3093 _mm_stream_ps(pointer + 8, quad);
3094 _mm_stream_ps(pointer + 12, quad);
3095
3096 pointer += 16;
3097 }
3098
3099 buffer = pointer;
3100 }
3101
3102 while(bytes >= 4)
3103 {
3104 *(int*)buffer = (int)pattern;
3105 (int*&)buffer += 1;
3106 bytes -= 4;
3107 }
3108
3109 while(bytes >= 2)
3110 {
3111 *(short*)buffer = (short)pattern;
3112 (short*&)buffer += 1;
3113 bytes -= 2;
3114 }
3115
3116 while(bytes >= 1)
3117 {
3118 *(char*)buffer = (char)pattern;
3119 (char*&)buffer += 1;
3120 bytes -= 1;
3121 }
3122 }
3123
Alexis Hetu75b650f2015-11-19 17:40:15 -05003124 bool Surface::isEntire(const SliceRect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003125 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003126 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3127 }
John Bauman89401822014-05-06 15:04:28 -04003128
Nicolas Capensc39901e2016-03-21 16:37:44 -04003129 SliceRect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003130 {
Nicolas Capensc39901e2016-03-21 16:37:44 -04003131 return SliceRect(0, 0, internal.width, internal.height, 0);
John Bauman89401822014-05-06 15:04:28 -04003132 }
3133
Nicolas Capensc39901e2016-03-21 16:37:44 -04003134 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003135 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003136 if(width == 0 || height == 0) return;
3137
John Bauman89401822014-05-06 15:04:28 -04003138 // Not overlapping
3139 if(x0 > internal.width) return;
3140 if(y0 > internal.height) return;
3141 if(x0 + width < 0) return;
3142 if(y0 + height < 0) return;
3143
3144 // Clip against dimensions
3145 if(x0 < 0) {width += x0; x0 = 0;}
3146 if(x0 + width > internal.width) width = internal.width - x0;
3147 if(y0 < 0) {height += y0; y0 = 0;}
3148 if(y0 + height > internal.height) height = internal.height - y0;
3149
3150 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3151 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3152
3153 int width2 = (internal.width + 1) & ~1;
3154
3155 int x1 = x0 + width;
3156 int y1 = y0 + height;
3157
3158 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04003159 internal.format == FORMAT_D32FS8_TEXTURE ||
3160 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04003161 {
3162 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3163
3164 for(int z = 0; z < internal.depth; z++)
3165 {
3166 for(int y = y0; y < y1; y++)
3167 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003168 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003169 target += width2;
3170 }
3171 }
3172
3173 unlockInternal();
3174 }
3175 else // Quad layout
3176 {
3177 if(complementaryDepthBuffer)
3178 {
3179 depth = 1 - depth;
3180 }
3181
3182 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3183
Alexis Hetu358a1442015-12-03 14:23:10 -05003184 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3185 int oddX1 = (x1 & ~1) * 2;
3186 int evenX0 = ((x0 + 1) & ~1) * 2;
3187 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3188
John Bauman89401822014-05-06 15:04:28 -04003189 for(int z = 0; z < internal.depth; z++)
3190 {
3191 for(int y = y0; y < y1; y++)
3192 {
3193 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003194
John Bauman89401822014-05-06 15:04:28 -04003195 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3196 {
3197 if((x0 & 1) != 0)
3198 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003199 target[oddX0 + 0] = depth;
3200 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003201 }
3202
Alexis Hetu358a1442015-12-03 14:23:10 -05003203 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003204 // {
3205 // target[x2 + 0] = depth;
3206 // target[x2 + 1] = depth;
3207 // target[x2 + 2] = depth;
3208 // target[x2 + 3] = depth;
3209 // }
3210
3211 // __asm
3212 // {
3213 // movss xmm0, depth
3214 // shufps xmm0, xmm0, 0x00
3215 //
3216 // mov eax, x0
3217 // add eax, 1
3218 // and eax, 0xFFFFFFFE
3219 // cmp eax, x1
3220 // jge qEnd
3221 //
3222 // mov edi, target
3223 //
3224 // qLoop:
3225 // movntps [edi+8*eax], xmm0
3226 //
3227 // add eax, 2
3228 // cmp eax, x1
3229 // jl qLoop
3230 // qEnd:
3231 // }
3232
Alexis Hetu358a1442015-12-03 14:23:10 -05003233 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003234
3235 if((x1 & 1) != 0)
3236 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003237 target[oddX1 + 0] = depth;
3238 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003239 }
3240
3241 y++;
3242 }
3243 else
3244 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003245 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003246 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003247 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003248 }
3249 }
3250 }
3251
3252 buffer += internal.sliceP;
3253 }
3254
3255 unlockInternal();
3256 }
3257 }
3258
Nicolas Capensc39901e2016-03-21 16:37:44 -04003259 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003260 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003261 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003262
John Bauman89401822014-05-06 15:04:28 -04003263 // Not overlapping
3264 if(x0 > internal.width) return;
3265 if(y0 > internal.height) return;
3266 if(x0 + width < 0) return;
3267 if(y0 + height < 0) return;
3268
3269 // Clip against dimensions
3270 if(x0 < 0) {width += x0; x0 = 0;}
3271 if(x0 + width > internal.width) width = internal.width - x0;
3272 if(y0 < 0) {height += y0; y0 = 0;}
3273 if(y0 + height > internal.height) height = internal.height - y0;
3274
3275 int width2 = (internal.width + 1) & ~1;
3276
3277 int x1 = x0 + width;
3278 int y1 = y0 + height;
3279
Alexis Hetu358a1442015-12-03 14:23:10 -05003280 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3281 int oddX1 = (x1 & ~1) * 2;
3282 int evenX0 = ((x0 + 1) & ~1) * 2;
3283 int evenBytes = oddX1 - evenX0;
3284
John Bauman89401822014-05-06 15:04:28 -04003285 unsigned char maskedS = s & mask;
3286 unsigned char invMask = ~mask;
3287 unsigned int fill = maskedS;
3288 fill = fill | (fill << 8) | (fill << 16) + (fill << 24);
3289
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003290 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003291
3292 // Stencil buffers are assumed to use quad layout
3293 for(int z = 0; z < stencil.depth; z++)
John Bauman89401822014-05-06 15:04:28 -04003294 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003295 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003296 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003297 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3298
3299 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003300 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003301 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003302 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003303 target[oddX0 + 0] = fill;
3304 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003305 }
3306
Alexis Hetu358a1442015-12-03 14:23:10 -05003307 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003308
3309 if((x1 & 1) != 0)
3310 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003311 target[oddX1 + 0] = fill;
3312 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003313 }
3314
3315 y++;
3316 }
3317 else
3318 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003319 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
Alexis Hetu2b052f82015-11-25 13:57:28 -05003320 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003321 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003322 }
John Bauman89401822014-05-06 15:04:28 -04003323 }
3324 }
3325
Alexis Hetu2b052f82015-11-25 13:57:28 -05003326 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003327 }
John Bauman89401822014-05-06 15:04:28 -04003328
Alexis Hetu2b052f82015-11-25 13:57:28 -05003329 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003330 }
3331
3332 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3333 {
3334 unsigned char *row;
3335 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003336
John Bauman89401822014-05-06 15:04:28 -04003337 if(internal.dirty)
3338 {
3339 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3340 buffer = &internal;
3341 }
3342 else
3343 {
3344 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3345 buffer = &external;
3346 }
3347
3348 if(buffer->bytes <= 4)
3349 {
3350 int c;
3351 buffer->write(&c, color);
3352
3353 if(buffer->bytes <= 1) c = (c << 8) | c;
3354 if(buffer->bytes <= 2) c = (c << 16) | c;
3355
3356 for(int y = 0; y < height; y++)
3357 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003358 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003359
3360 row += buffer->pitchB;
3361 }
3362 }
3363 else // Generic
3364 {
3365 for(int y = 0; y < height; y++)
3366 {
3367 unsigned char *element = row;
3368
3369 for(int x = 0; x < width; x++)
3370 {
3371 buffer->write(element, color);
3372
3373 element += buffer->bytes;
3374 }
3375
3376 row += buffer->pitchB;
3377 }
3378 }
3379
3380 if(buffer == &internal)
3381 {
3382 unlockInternal();
3383 }
3384 else
3385 {
3386 unlockExternal();
3387 }
3388 }
3389
Alexis Hetu43577b82015-10-21 15:32:16 -04003390 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003391 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003392 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003393
Alexis Hetu43577b82015-10-21 15:32:16 -04003394 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003395
Alexis Hetu43577b82015-10-21 15:32:16 -04003396 if(!filter)
3397 {
3398 color = source->internal.read((int)srcX, (int)srcY);
3399 }
3400 else // Bilinear filtering
3401 {
3402 color = source->internal.sample(srcX, srcY);
3403 }
John Bauman89401822014-05-06 15:04:28 -04003404
3405 internal.write(x, y, color);
3406 }
3407
Alexis Hetu43577b82015-10-21 15:32:16 -04003408 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3409 {
3410 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3411
3412 sw::Color<float> color;
3413
3414 if(!filter)
3415 {
3416 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3417 }
3418 else // Bilinear filtering
3419 {
3420 color = source->internal.sample(srcX, srcY, srcZ);
3421 }
3422
3423 internal.write(x, y, z, color);
3424 }
3425
John Bauman89401822014-05-06 15:04:28 -04003426 bool Surface::hasStencil() const
3427 {
3428 return isStencil(external.format);
3429 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003430
John Bauman89401822014-05-06 15:04:28 -04003431 bool Surface::hasDepth() const
3432 {
3433 return isDepth(external.format);
3434 }
3435
3436 bool Surface::hasPalette() const
3437 {
3438 return isPalette(external.format);
3439 }
3440
3441 bool Surface::isRenderTarget() const
3442 {
3443 return renderTarget;
3444 }
3445
3446 bool Surface::hasDirtyMipmaps() const
3447 {
3448 return dirtyMipmaps;
3449 }
3450
3451 void Surface::cleanMipmaps()
3452 {
3453 dirtyMipmaps = false;
3454 }
3455
3456 Resource *Surface::getResource()
3457 {
3458 return resource;
3459 }
3460
3461 bool Surface::identicalFormats() const
3462 {
John Bauman66b8ab22014-05-06 15:57:45 -04003463 return external.format == internal.format &&
3464 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003465 external.height == internal.height &&
3466 external.depth == internal.depth &&
3467 external.pitchB == internal.pitchB &&
3468 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003469 }
3470
3471 Format Surface::selectInternalFormat(Format format) const
3472 {
3473 switch(format)
3474 {
3475 case FORMAT_NULL:
3476 return FORMAT_NULL;
3477 case FORMAT_P8:
3478 case FORMAT_A8P8:
3479 case FORMAT_A4R4G4B4:
3480 case FORMAT_A1R5G5B5:
3481 case FORMAT_A8R3G3B2:
3482 return FORMAT_A8R8G8B8;
3483 case FORMAT_A8:
3484 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003485 case FORMAT_R8I:
3486 return FORMAT_R8I;
3487 case FORMAT_R8UI:
3488 return FORMAT_R8UI;
3489 case FORMAT_R8I_SNORM:
3490 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003491 case FORMAT_R8:
3492 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003493 case FORMAT_R16I:
3494 return FORMAT_R16I;
3495 case FORMAT_R16UI:
3496 return FORMAT_R16UI;
3497 case FORMAT_R32I:
3498 return FORMAT_R32I;
3499 case FORMAT_R32UI:
3500 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003501 case FORMAT_X16B16G16R16I:
3502 case FORMAT_A16B16G16R16I:
3503 return FORMAT_A16B16G16R16I;
3504 case FORMAT_X16B16G16R16UI:
3505 case FORMAT_A16B16G16R16UI:
3506 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003507 case FORMAT_A2R10G10B10:
3508 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003509 case FORMAT_A16B16G16R16:
3510 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003511 case FORMAT_X32B32G32R32I:
3512 case FORMAT_A32B32G32R32I:
3513 return FORMAT_A32B32G32R32I;
3514 case FORMAT_X32B32G32R32UI:
3515 case FORMAT_A32B32G32R32UI:
3516 return FORMAT_A32B32G32R32UI;
3517 case FORMAT_G8R8I:
3518 return FORMAT_G8R8I;
3519 case FORMAT_G8R8UI:
3520 return FORMAT_G8R8UI;
3521 case FORMAT_G8R8I_SNORM:
3522 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003523 case FORMAT_G8R8:
3524 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003525 case FORMAT_G16R16I:
3526 return FORMAT_G16R16I;
3527 case FORMAT_G16R16UI:
3528 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003529 case FORMAT_G16R16:
3530 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003531 case FORMAT_G32R32I:
3532 return FORMAT_G32R32I;
3533 case FORMAT_G32R32UI:
3534 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003535 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003536 if(lockable || !quadLayoutEnabled)
3537 {
3538 return FORMAT_A8R8G8B8;
3539 }
3540 else
3541 {
3542 return FORMAT_A8G8R8B8Q;
3543 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003544 case FORMAT_A8B8G8R8I:
3545 return FORMAT_A8B8G8R8I;
3546 case FORMAT_A8B8G8R8UI:
3547 return FORMAT_A8B8G8R8UI;
3548 case FORMAT_A8B8G8R8I_SNORM:
3549 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003550 case FORMAT_R5G5B5A1:
3551 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003552 case FORMAT_A8B8G8R8:
3553 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003554 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003555 return FORMAT_R5G6B5;
3556 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003557 case FORMAT_R8G8B8:
3558 case FORMAT_X4R4G4B4:
3559 case FORMAT_X1R5G5B5:
3560 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003561 if(lockable || !quadLayoutEnabled)
3562 {
3563 return FORMAT_X8R8G8B8;
3564 }
3565 else
3566 {
3567 return FORMAT_X8G8R8B8Q;
3568 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003569 case FORMAT_X8B8G8R8I:
3570 return FORMAT_X8B8G8R8I;
3571 case FORMAT_X8B8G8R8UI:
3572 return FORMAT_X8B8G8R8UI;
3573 case FORMAT_X8B8G8R8I_SNORM:
3574 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003575 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003576 case FORMAT_X8B8G8R8:
3577 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003578 case FORMAT_SRGB8_X8:
3579 return FORMAT_SRGB8_X8;
3580 case FORMAT_SRGB8_A8:
3581 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003582 // Compressed formats
3583 #if S3TC_SUPPORT
3584 case FORMAT_DXT1:
3585 case FORMAT_DXT3:
3586 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003587 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003588 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3589 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3590 case FORMAT_RGBA8_ETC2_EAC:
3591 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3592 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3593 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3594 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3595 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3596 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3597 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3598 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3599 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3600 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3601 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3602 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3603 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3604 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3605 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3606 return FORMAT_A8R8G8B8;
3607 case FORMAT_RGBA_ASTC_4x4_KHR:
3608 case FORMAT_RGBA_ASTC_5x4_KHR:
3609 case FORMAT_RGBA_ASTC_5x5_KHR:
3610 case FORMAT_RGBA_ASTC_6x5_KHR:
3611 case FORMAT_RGBA_ASTC_6x6_KHR:
3612 case FORMAT_RGBA_ASTC_8x5_KHR:
3613 case FORMAT_RGBA_ASTC_8x6_KHR:
3614 case FORMAT_RGBA_ASTC_8x8_KHR:
3615 case FORMAT_RGBA_ASTC_10x5_KHR:
3616 case FORMAT_RGBA_ASTC_10x6_KHR:
3617 case FORMAT_RGBA_ASTC_10x8_KHR:
3618 case FORMAT_RGBA_ASTC_10x10_KHR:
3619 case FORMAT_RGBA_ASTC_12x10_KHR:
3620 case FORMAT_RGBA_ASTC_12x12_KHR:
3621 // ASTC supports HDR, so a floating point format is required to represent it properly
3622 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003623 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003624 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003625 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003626 case FORMAT_SIGNED_R11_EAC:
3627 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003628 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003629 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003630 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003631 case FORMAT_SIGNED_RG11_EAC:
3632 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003633 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003634 case FORMAT_RGB8_ETC2:
3635 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003636 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003637 // Bumpmap formats
3638 case FORMAT_V8U8: return FORMAT_V8U8;
3639 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3640 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3641 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3642 case FORMAT_V16U16: return FORMAT_V16U16;
3643 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3644 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3645 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003646 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003647 case FORMAT_R16F: return FORMAT_R32F;
3648 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003649 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003650 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003651 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003652 case FORMAT_R32F: return FORMAT_R32F;
3653 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003654 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3655 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003656 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3657 // Luminance formats
3658 case FORMAT_L8: return FORMAT_L8;
3659 case FORMAT_A4L4: return FORMAT_A8L8;
3660 case FORMAT_L16: return FORMAT_L16;
3661 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003662 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003663 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003664 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003665 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003666 // Depth/stencil formats
3667 case FORMAT_D16:
3668 case FORMAT_D32:
3669 case FORMAT_D24X8:
3670 case FORMAT_D24S8:
3671 case FORMAT_D24FS8:
3672 if(hasParent) // Texture
3673 {
John Bauman66b8ab22014-05-06 15:57:45 -04003674 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003675 }
3676 else if(complementaryDepthBuffer)
3677 {
3678 return FORMAT_D32F_COMPLEMENTARY;
3679 }
3680 else
3681 {
3682 return FORMAT_D32F;
3683 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003684 case FORMAT_D32F: return FORMAT_D32F;
John Bauman66b8ab22014-05-06 15:57:45 -04003685 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3686 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3687 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3688 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3689 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003690 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3691 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3692 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003693 default:
3694 ASSERT(false);
3695 }
3696
3697 return FORMAT_NULL;
3698 }
3699
3700 void Surface::setTexturePalette(unsigned int *palette)
3701 {
3702 Surface::palette = palette;
3703 Surface::paletteID++;
3704 }
3705
3706 void Surface::resolve()
3707 {
3708 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3709 {
3710 return;
3711 }
3712
3713 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3714
John Bauman89401822014-05-06 15:04:28 -04003715 int width = internal.width;
3716 int height = internal.height;
3717 int pitch = internal.pitchB;
3718 int slice = internal.sliceB;
3719
3720 unsigned char *source0 = (unsigned char*)source;
3721 unsigned char *source1 = source0 + slice;
3722 unsigned char *source2 = source1 + slice;
3723 unsigned char *source3 = source2 + slice;
3724 unsigned char *source4 = source3 + slice;
3725 unsigned char *source5 = source4 + slice;
3726 unsigned char *source6 = source5 + slice;
3727 unsigned char *source7 = source6 + slice;
3728 unsigned char *source8 = source7 + slice;
3729 unsigned char *source9 = source8 + slice;
3730 unsigned char *sourceA = source9 + slice;
3731 unsigned char *sourceB = sourceA + slice;
3732 unsigned char *sourceC = sourceB + slice;
3733 unsigned char *sourceD = sourceC + slice;
3734 unsigned char *sourceE = sourceD + slice;
3735 unsigned char *sourceF = sourceE + slice;
3736
Alexis Hetu049a1872016-04-25 16:59:58 -04003737 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
3738 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
3739 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04003740 {
3741 if(CPUID::supportsSSE2() && (width % 4) == 0)
3742 {
3743 if(internal.depth == 2)
3744 {
3745 for(int y = 0; y < height; y++)
3746 {
3747 for(int x = 0; x < width; x += 4)
3748 {
3749 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3750 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003751
John Bauman89401822014-05-06 15:04:28 -04003752 c0 = _mm_avg_epu8(c0, c1);
3753
3754 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3755 }
3756
3757 source0 += pitch;
3758 source1 += pitch;
3759 }
3760 }
3761 else if(internal.depth == 4)
3762 {
3763 for(int y = 0; y < height; y++)
3764 {
3765 for(int x = 0; x < width; x += 4)
3766 {
3767 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3768 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3769 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3770 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003771
John Bauman89401822014-05-06 15:04:28 -04003772 c0 = _mm_avg_epu8(c0, c1);
3773 c2 = _mm_avg_epu8(c2, c3);
3774 c0 = _mm_avg_epu8(c0, c2);
3775
3776 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3777 }
3778
3779 source0 += pitch;
3780 source1 += pitch;
3781 source2 += pitch;
3782 source3 += pitch;
3783 }
3784 }
3785 else if(internal.depth == 8)
3786 {
3787 for(int y = 0; y < height; y++)
3788 {
3789 for(int x = 0; x < width; x += 4)
3790 {
3791 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3792 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3793 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3794 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3795 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3796 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3797 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3798 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003799
John Bauman89401822014-05-06 15:04:28 -04003800 c0 = _mm_avg_epu8(c0, c1);
3801 c2 = _mm_avg_epu8(c2, c3);
3802 c4 = _mm_avg_epu8(c4, c5);
3803 c6 = _mm_avg_epu8(c6, c7);
3804 c0 = _mm_avg_epu8(c0, c2);
3805 c4 = _mm_avg_epu8(c4, c6);
3806 c0 = _mm_avg_epu8(c0, c4);
3807
3808 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3809 }
3810
3811 source0 += pitch;
3812 source1 += pitch;
3813 source2 += pitch;
3814 source3 += pitch;
3815 source4 += pitch;
3816 source5 += pitch;
3817 source6 += pitch;
3818 source7 += pitch;
3819 }
3820 }
3821 else if(internal.depth == 16)
3822 {
3823 for(int y = 0; y < height; y++)
3824 {
3825 for(int x = 0; x < width; x += 4)
3826 {
3827 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3828 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3829 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3830 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3831 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3832 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3833 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3834 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3835 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3836 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3837 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3838 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3839 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3840 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3841 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3842 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
3843
3844 c0 = _mm_avg_epu8(c0, c1);
3845 c2 = _mm_avg_epu8(c2, c3);
3846 c4 = _mm_avg_epu8(c4, c5);
3847 c6 = _mm_avg_epu8(c6, c7);
3848 c8 = _mm_avg_epu8(c8, c9);
3849 cA = _mm_avg_epu8(cA, cB);
3850 cC = _mm_avg_epu8(cC, cD);
3851 cE = _mm_avg_epu8(cE, cF);
3852 c0 = _mm_avg_epu8(c0, c2);
3853 c4 = _mm_avg_epu8(c4, c6);
3854 c8 = _mm_avg_epu8(c8, cA);
3855 cC = _mm_avg_epu8(cC, cE);
3856 c0 = _mm_avg_epu8(c0, c4);
3857 c8 = _mm_avg_epu8(c8, cC);
3858 c0 = _mm_avg_epu8(c0, c8);
3859
3860 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3861 }
3862
3863 source0 += pitch;
3864 source1 += pitch;
3865 source2 += pitch;
3866 source3 += pitch;
3867 source4 += pitch;
3868 source5 += pitch;
3869 source6 += pitch;
3870 source7 += pitch;
3871 source8 += pitch;
3872 source9 += pitch;
3873 sourceA += pitch;
3874 sourceB += pitch;
3875 sourceC += pitch;
3876 sourceD += pitch;
3877 sourceE += pitch;
3878 sourceF += pitch;
3879 }
3880 }
3881 else ASSERT(false);
3882 }
3883 else
3884 {
3885 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3886
3887 if(internal.depth == 2)
3888 {
3889 for(int y = 0; y < height; y++)
3890 {
3891 for(int x = 0; x < width; x++)
3892 {
3893 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3894 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3895
3896 c0 = AVERAGE(c0, c1);
3897
3898 *(unsigned int*)(source0 + 4 * x) = c0;
3899 }
3900
3901 source0 += pitch;
3902 source1 += pitch;
3903 }
3904 }
3905 else if(internal.depth == 4)
3906 {
3907 for(int y = 0; y < height; y++)
3908 {
3909 for(int x = 0; x < width; x++)
3910 {
3911 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3912 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3913 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3914 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3915
3916 c0 = AVERAGE(c0, c1);
3917 c2 = AVERAGE(c2, c3);
3918 c0 = AVERAGE(c0, c2);
3919
3920 *(unsigned int*)(source0 + 4 * x) = c0;
3921 }
3922
3923 source0 += pitch;
3924 source1 += pitch;
3925 source2 += pitch;
3926 source3 += pitch;
3927 }
3928 }
3929 else if(internal.depth == 8)
3930 {
3931 for(int y = 0; y < height; y++)
3932 {
3933 for(int x = 0; x < width; x++)
3934 {
3935 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3936 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3937 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3938 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3939 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3940 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3941 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3942 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3943
3944 c0 = AVERAGE(c0, c1);
3945 c2 = AVERAGE(c2, c3);
3946 c4 = AVERAGE(c4, c5);
3947 c6 = AVERAGE(c6, c7);
3948 c0 = AVERAGE(c0, c2);
3949 c4 = AVERAGE(c4, c6);
3950 c0 = AVERAGE(c0, c4);
3951
3952 *(unsigned int*)(source0 + 4 * x) = c0;
3953 }
3954
3955 source0 += pitch;
3956 source1 += pitch;
3957 source2 += pitch;
3958 source3 += pitch;
3959 source4 += pitch;
3960 source5 += pitch;
3961 source6 += pitch;
3962 source7 += pitch;
3963 }
3964 }
3965 else if(internal.depth == 16)
3966 {
3967 for(int y = 0; y < height; y++)
3968 {
3969 for(int x = 0; x < width; x++)
3970 {
3971 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3972 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3973 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3974 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3975 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3976 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3977 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3978 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3979 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
3980 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
3981 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
3982 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
3983 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
3984 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
3985 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
3986 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
3987
3988 c0 = AVERAGE(c0, c1);
3989 c2 = AVERAGE(c2, c3);
3990 c4 = AVERAGE(c4, c5);
3991 c6 = AVERAGE(c6, c7);
3992 c8 = AVERAGE(c8, c9);
3993 cA = AVERAGE(cA, cB);
3994 cC = AVERAGE(cC, cD);
3995 cE = AVERAGE(cE, cF);
3996 c0 = AVERAGE(c0, c2);
3997 c4 = AVERAGE(c4, c6);
3998 c8 = AVERAGE(c8, cA);
3999 cC = AVERAGE(cC, cE);
4000 c0 = AVERAGE(c0, c4);
4001 c8 = AVERAGE(c8, cC);
4002 c0 = AVERAGE(c0, c8);
4003
4004 *(unsigned int*)(source0 + 4 * x) = c0;
4005 }
4006
4007 source0 += pitch;
4008 source1 += pitch;
4009 source2 += pitch;
4010 source3 += pitch;
4011 source4 += pitch;
4012 source5 += pitch;
4013 source6 += pitch;
4014 source7 += pitch;
4015 source8 += pitch;
4016 source9 += pitch;
4017 sourceA += pitch;
4018 sourceB += pitch;
4019 sourceC += pitch;
4020 sourceD += pitch;
4021 sourceE += pitch;
4022 sourceF += pitch;
4023 }
4024 }
4025 else ASSERT(false);
4026
4027 #undef AVERAGE
4028 }
4029 }
4030 else if(internal.format == FORMAT_G16R16)
4031 {
4032 if(CPUID::supportsSSE2() && (width % 4) == 0)
4033 {
4034 if(internal.depth == 2)
4035 {
4036 for(int y = 0; y < height; y++)
4037 {
4038 for(int x = 0; x < width; x += 4)
4039 {
4040 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4041 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004042
John Bauman89401822014-05-06 15:04:28 -04004043 c0 = _mm_avg_epu16(c0, c1);
4044
4045 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4046 }
4047
4048 source0 += pitch;
4049 source1 += pitch;
4050 }
4051 }
4052 else if(internal.depth == 4)
4053 {
4054 for(int y = 0; y < height; y++)
4055 {
4056 for(int x = 0; x < width; x += 4)
4057 {
4058 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4059 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4060 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4061 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004062
John Bauman89401822014-05-06 15:04:28 -04004063 c0 = _mm_avg_epu16(c0, c1);
4064 c2 = _mm_avg_epu16(c2, c3);
4065 c0 = _mm_avg_epu16(c0, c2);
4066
4067 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4068 }
4069
4070 source0 += pitch;
4071 source1 += pitch;
4072 source2 += pitch;
4073 source3 += pitch;
4074 }
4075 }
4076 else if(internal.depth == 8)
4077 {
4078 for(int y = 0; y < height; y++)
4079 {
4080 for(int x = 0; x < width; x += 4)
4081 {
4082 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4083 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4084 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4085 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4086 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4087 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4088 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4089 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004090
John Bauman89401822014-05-06 15:04:28 -04004091 c0 = _mm_avg_epu16(c0, c1);
4092 c2 = _mm_avg_epu16(c2, c3);
4093 c4 = _mm_avg_epu16(c4, c5);
4094 c6 = _mm_avg_epu16(c6, c7);
4095 c0 = _mm_avg_epu16(c0, c2);
4096 c4 = _mm_avg_epu16(c4, c6);
4097 c0 = _mm_avg_epu16(c0, c4);
4098
4099 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4100 }
4101
4102 source0 += pitch;
4103 source1 += pitch;
4104 source2 += pitch;
4105 source3 += pitch;
4106 source4 += pitch;
4107 source5 += pitch;
4108 source6 += pitch;
4109 source7 += pitch;
4110 }
4111 }
4112 else if(internal.depth == 16)
4113 {
4114 for(int y = 0; y < height; y++)
4115 {
4116 for(int x = 0; x < width; x += 4)
4117 {
4118 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4119 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4120 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4121 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4122 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4123 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4124 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4125 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4126 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4127 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4128 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4129 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4130 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4131 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4132 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4133 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4134
4135 c0 = _mm_avg_epu16(c0, c1);
4136 c2 = _mm_avg_epu16(c2, c3);
4137 c4 = _mm_avg_epu16(c4, c5);
4138 c6 = _mm_avg_epu16(c6, c7);
4139 c8 = _mm_avg_epu16(c8, c9);
4140 cA = _mm_avg_epu16(cA, cB);
4141 cC = _mm_avg_epu16(cC, cD);
4142 cE = _mm_avg_epu16(cE, cF);
4143 c0 = _mm_avg_epu16(c0, c2);
4144 c4 = _mm_avg_epu16(c4, c6);
4145 c8 = _mm_avg_epu16(c8, cA);
4146 cC = _mm_avg_epu16(cC, cE);
4147 c0 = _mm_avg_epu16(c0, c4);
4148 c8 = _mm_avg_epu16(c8, cC);
4149 c0 = _mm_avg_epu16(c0, c8);
4150
4151 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4152 }
4153
4154 source0 += pitch;
4155 source1 += pitch;
4156 source2 += pitch;
4157 source3 += pitch;
4158 source4 += pitch;
4159 source5 += pitch;
4160 source6 += pitch;
4161 source7 += pitch;
4162 source8 += pitch;
4163 source9 += pitch;
4164 sourceA += pitch;
4165 sourceB += pitch;
4166 sourceC += pitch;
4167 sourceD += pitch;
4168 sourceE += pitch;
4169 sourceF += pitch;
4170 }
4171 }
4172 else ASSERT(false);
4173 }
4174 else
4175 {
4176 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4177
4178 if(internal.depth == 2)
4179 {
4180 for(int y = 0; y < height; y++)
4181 {
4182 for(int x = 0; x < width; x++)
4183 {
4184 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4185 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4186
4187 c0 = AVERAGE(c0, c1);
4188
4189 *(unsigned int*)(source0 + 4 * x) = c0;
4190 }
4191
4192 source0 += pitch;
4193 source1 += pitch;
4194 }
4195 }
4196 else if(internal.depth == 4)
4197 {
4198 for(int y = 0; y < height; y++)
4199 {
4200 for(int x = 0; x < width; x++)
4201 {
4202 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4203 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4204 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4205 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4206
4207 c0 = AVERAGE(c0, c1);
4208 c2 = AVERAGE(c2, c3);
4209 c0 = AVERAGE(c0, c2);
4210
4211 *(unsigned int*)(source0 + 4 * x) = c0;
4212 }
4213
4214 source0 += pitch;
4215 source1 += pitch;
4216 source2 += pitch;
4217 source3 += pitch;
4218 }
4219 }
4220 else if(internal.depth == 8)
4221 {
4222 for(int y = 0; y < height; y++)
4223 {
4224 for(int x = 0; x < width; x++)
4225 {
4226 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4227 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4228 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4229 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4230 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4231 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4232 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4233 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4234
4235 c0 = AVERAGE(c0, c1);
4236 c2 = AVERAGE(c2, c3);
4237 c4 = AVERAGE(c4, c5);
4238 c6 = AVERAGE(c6, c7);
4239 c0 = AVERAGE(c0, c2);
4240 c4 = AVERAGE(c4, c6);
4241 c0 = AVERAGE(c0, c4);
4242
4243 *(unsigned int*)(source0 + 4 * x) = c0;
4244 }
4245
4246 source0 += pitch;
4247 source1 += pitch;
4248 source2 += pitch;
4249 source3 += pitch;
4250 source4 += pitch;
4251 source5 += pitch;
4252 source6 += pitch;
4253 source7 += pitch;
4254 }
4255 }
4256 else if(internal.depth == 16)
4257 {
4258 for(int y = 0; y < height; y++)
4259 {
4260 for(int x = 0; x < width; x++)
4261 {
4262 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4263 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4264 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4265 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4266 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4267 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4268 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4269 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4270 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4271 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4272 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4273 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4274 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4275 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4276 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4277 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4278
4279 c0 = AVERAGE(c0, c1);
4280 c2 = AVERAGE(c2, c3);
4281 c4 = AVERAGE(c4, c5);
4282 c6 = AVERAGE(c6, c7);
4283 c8 = AVERAGE(c8, c9);
4284 cA = AVERAGE(cA, cB);
4285 cC = AVERAGE(cC, cD);
4286 cE = AVERAGE(cE, cF);
4287 c0 = AVERAGE(c0, c2);
4288 c4 = AVERAGE(c4, c6);
4289 c8 = AVERAGE(c8, cA);
4290 cC = AVERAGE(cC, cE);
4291 c0 = AVERAGE(c0, c4);
4292 c8 = AVERAGE(c8, cC);
4293 c0 = AVERAGE(c0, c8);
4294
4295 *(unsigned int*)(source0 + 4 * x) = c0;
4296 }
4297
4298 source0 += pitch;
4299 source1 += pitch;
4300 source2 += pitch;
4301 source3 += pitch;
4302 source4 += pitch;
4303 source5 += pitch;
4304 source6 += pitch;
4305 source7 += pitch;
4306 source8 += pitch;
4307 source9 += pitch;
4308 sourceA += pitch;
4309 sourceB += pitch;
4310 sourceC += pitch;
4311 sourceD += pitch;
4312 sourceE += pitch;
4313 sourceF += pitch;
4314 }
4315 }
4316 else ASSERT(false);
4317
4318 #undef AVERAGE
4319 }
4320 }
4321 else if(internal.format == FORMAT_A16B16G16R16)
4322 {
4323 if(CPUID::supportsSSE2() && (width % 2) == 0)
4324 {
4325 if(internal.depth == 2)
4326 {
4327 for(int y = 0; y < height; y++)
4328 {
4329 for(int x = 0; x < width; x += 2)
4330 {
4331 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4332 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004333
John Bauman89401822014-05-06 15:04:28 -04004334 c0 = _mm_avg_epu16(c0, c1);
4335
4336 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4337 }
4338
4339 source0 += pitch;
4340 source1 += pitch;
4341 }
4342 }
4343 else if(internal.depth == 4)
4344 {
4345 for(int y = 0; y < height; y++)
4346 {
4347 for(int x = 0; x < width; x += 2)
4348 {
4349 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4350 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4351 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4352 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004353
John Bauman89401822014-05-06 15:04:28 -04004354 c0 = _mm_avg_epu16(c0, c1);
4355 c2 = _mm_avg_epu16(c2, c3);
4356 c0 = _mm_avg_epu16(c0, c2);
4357
4358 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4359 }
4360
4361 source0 += pitch;
4362 source1 += pitch;
4363 source2 += pitch;
4364 source3 += pitch;
4365 }
4366 }
4367 else if(internal.depth == 8)
4368 {
4369 for(int y = 0; y < height; y++)
4370 {
4371 for(int x = 0; x < width; x += 2)
4372 {
4373 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4374 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4375 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4376 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4377 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4378 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4379 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4380 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004381
John Bauman89401822014-05-06 15:04:28 -04004382 c0 = _mm_avg_epu16(c0, c1);
4383 c2 = _mm_avg_epu16(c2, c3);
4384 c4 = _mm_avg_epu16(c4, c5);
4385 c6 = _mm_avg_epu16(c6, c7);
4386 c0 = _mm_avg_epu16(c0, c2);
4387 c4 = _mm_avg_epu16(c4, c6);
4388 c0 = _mm_avg_epu16(c0, c4);
4389
4390 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4391 }
4392
4393 source0 += pitch;
4394 source1 += pitch;
4395 source2 += pitch;
4396 source3 += pitch;
4397 source4 += pitch;
4398 source5 += pitch;
4399 source6 += pitch;
4400 source7 += pitch;
4401 }
4402 }
4403 else if(internal.depth == 16)
4404 {
4405 for(int y = 0; y < height; y++)
4406 {
4407 for(int x = 0; x < width; x += 2)
4408 {
4409 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4410 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4411 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4412 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4413 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4414 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4415 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4416 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4417 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4418 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4419 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4420 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4421 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4422 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4423 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4424 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
4425
4426 c0 = _mm_avg_epu16(c0, c1);
4427 c2 = _mm_avg_epu16(c2, c3);
4428 c4 = _mm_avg_epu16(c4, c5);
4429 c6 = _mm_avg_epu16(c6, c7);
4430 c8 = _mm_avg_epu16(c8, c9);
4431 cA = _mm_avg_epu16(cA, cB);
4432 cC = _mm_avg_epu16(cC, cD);
4433 cE = _mm_avg_epu16(cE, cF);
4434 c0 = _mm_avg_epu16(c0, c2);
4435 c4 = _mm_avg_epu16(c4, c6);
4436 c8 = _mm_avg_epu16(c8, cA);
4437 cC = _mm_avg_epu16(cC, cE);
4438 c0 = _mm_avg_epu16(c0, c4);
4439 c8 = _mm_avg_epu16(c8, cC);
4440 c0 = _mm_avg_epu16(c0, c8);
4441
4442 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4443 }
4444
4445 source0 += pitch;
4446 source1 += pitch;
4447 source2 += pitch;
4448 source3 += pitch;
4449 source4 += pitch;
4450 source5 += pitch;
4451 source6 += pitch;
4452 source7 += pitch;
4453 source8 += pitch;
4454 source9 += pitch;
4455 sourceA += pitch;
4456 sourceB += pitch;
4457 sourceC += pitch;
4458 sourceD += pitch;
4459 sourceE += pitch;
4460 sourceF += pitch;
4461 }
4462 }
4463 else ASSERT(false);
4464 }
4465 else
4466 {
4467 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4468
4469 if(internal.depth == 2)
4470 {
4471 for(int y = 0; y < height; y++)
4472 {
4473 for(int x = 0; x < 2 * width; x++)
4474 {
4475 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4476 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4477
4478 c0 = AVERAGE(c0, c1);
4479
4480 *(unsigned int*)(source0 + 4 * x) = c0;
4481 }
4482
4483 source0 += pitch;
4484 source1 += pitch;
4485 }
4486 }
4487 else if(internal.depth == 4)
4488 {
4489 for(int y = 0; y < height; y++)
4490 {
4491 for(int x = 0; x < 2 * width; x++)
4492 {
4493 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4494 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4495 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4496 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4497
4498 c0 = AVERAGE(c0, c1);
4499 c2 = AVERAGE(c2, c3);
4500 c0 = AVERAGE(c0, c2);
4501
4502 *(unsigned int*)(source0 + 4 * x) = c0;
4503 }
4504
4505 source0 += pitch;
4506 source1 += pitch;
4507 source2 += pitch;
4508 source3 += pitch;
4509 }
4510 }
4511 else if(internal.depth == 8)
4512 {
4513 for(int y = 0; y < height; y++)
4514 {
4515 for(int x = 0; x < 2 * width; x++)
4516 {
4517 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4518 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4519 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4520 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4521 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4522 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4523 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4524 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4525
4526 c0 = AVERAGE(c0, c1);
4527 c2 = AVERAGE(c2, c3);
4528 c4 = AVERAGE(c4, c5);
4529 c6 = AVERAGE(c6, c7);
4530 c0 = AVERAGE(c0, c2);
4531 c4 = AVERAGE(c4, c6);
4532 c0 = AVERAGE(c0, c4);
4533
4534 *(unsigned int*)(source0 + 4 * x) = c0;
4535 }
4536
4537 source0 += pitch;
4538 source1 += pitch;
4539 source2 += pitch;
4540 source3 += pitch;
4541 source4 += pitch;
4542 source5 += pitch;
4543 source6 += pitch;
4544 source7 += pitch;
4545 }
4546 }
4547 else if(internal.depth == 16)
4548 {
4549 for(int y = 0; y < height; y++)
4550 {
4551 for(int x = 0; x < 2 * width; x++)
4552 {
4553 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4554 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4555 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4556 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4557 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4558 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4559 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4560 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4561 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4562 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4563 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4564 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4565 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4566 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4567 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4568 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4569
4570 c0 = AVERAGE(c0, c1);
4571 c2 = AVERAGE(c2, c3);
4572 c4 = AVERAGE(c4, c5);
4573 c6 = AVERAGE(c6, c7);
4574 c8 = AVERAGE(c8, c9);
4575 cA = AVERAGE(cA, cB);
4576 cC = AVERAGE(cC, cD);
4577 cE = AVERAGE(cE, cF);
4578 c0 = AVERAGE(c0, c2);
4579 c4 = AVERAGE(c4, c6);
4580 c8 = AVERAGE(c8, cA);
4581 cC = AVERAGE(cC, cE);
4582 c0 = AVERAGE(c0, c4);
4583 c8 = AVERAGE(c8, cC);
4584 c0 = AVERAGE(c0, c8);
4585
4586 *(unsigned int*)(source0 + 4 * x) = c0;
4587 }
4588
4589 source0 += pitch;
4590 source1 += pitch;
4591 source2 += pitch;
4592 source3 += pitch;
4593 source4 += pitch;
4594 source5 += pitch;
4595 source6 += pitch;
4596 source7 += pitch;
4597 source8 += pitch;
4598 source9 += pitch;
4599 sourceA += pitch;
4600 sourceB += pitch;
4601 sourceC += pitch;
4602 sourceD += pitch;
4603 sourceE += pitch;
4604 sourceF += pitch;
4605 }
4606 }
4607 else ASSERT(false);
4608
4609 #undef AVERAGE
4610 }
4611 }
4612 else if(internal.format == FORMAT_R32F)
4613 {
4614 if(CPUID::supportsSSE() && (width % 4) == 0)
4615 {
4616 if(internal.depth == 2)
4617 {
4618 for(int y = 0; y < height; y++)
4619 {
4620 for(int x = 0; x < width; x += 4)
4621 {
4622 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4623 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004624
John Bauman89401822014-05-06 15:04:28 -04004625 c0 = _mm_add_ps(c0, c1);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004626 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004627
4628 _mm_store_ps((float*)(source0 + 4 * x), c0);
4629 }
4630
4631 source0 += pitch;
4632 source1 += pitch;
4633 }
4634 }
4635 else if(internal.depth == 4)
4636 {
4637 for(int y = 0; y < height; y++)
4638 {
4639 for(int x = 0; x < width; x += 4)
4640 {
4641 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4642 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4643 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4644 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004645
John Bauman89401822014-05-06 15:04:28 -04004646 c0 = _mm_add_ps(c0, c1);
4647 c2 = _mm_add_ps(c2, c3);
4648 c0 = _mm_add_ps(c0, c2);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004649 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004650
4651 _mm_store_ps((float*)(source0 + 4 * x), c0);
4652 }
4653
4654 source0 += pitch;
4655 source1 += pitch;
4656 source2 += pitch;
4657 source3 += pitch;
4658 }
4659 }
4660 else if(internal.depth == 8)
4661 {
4662 for(int y = 0; y < height; y++)
4663 {
4664 for(int x = 0; x < width; x += 4)
4665 {
4666 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4667 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4668 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4669 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4670 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4671 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4672 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4673 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004674
John Bauman89401822014-05-06 15:04:28 -04004675 c0 = _mm_add_ps(c0, c1);
4676 c2 = _mm_add_ps(c2, c3);
4677 c4 = _mm_add_ps(c4, c5);
4678 c6 = _mm_add_ps(c6, c7);
4679 c0 = _mm_add_ps(c0, c2);
4680 c4 = _mm_add_ps(c4, c6);
4681 c0 = _mm_add_ps(c0, c4);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004682 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004683
4684 _mm_store_ps((float*)(source0 + 4 * x), c0);
4685 }
4686
4687 source0 += pitch;
4688 source1 += pitch;
4689 source2 += pitch;
4690 source3 += pitch;
4691 source4 += pitch;
4692 source5 += pitch;
4693 source6 += pitch;
4694 source7 += pitch;
4695 }
4696 }
4697 else if(internal.depth == 16)
4698 {
4699 for(int y = 0; y < height; y++)
4700 {
4701 for(int x = 0; x < width; x += 4)
4702 {
4703 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4704 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4705 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4706 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4707 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4708 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4709 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4710 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4711 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4712 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4713 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4714 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4715 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4716 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4717 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4718 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
4719
4720 c0 = _mm_add_ps(c0, c1);
4721 c2 = _mm_add_ps(c2, c3);
4722 c4 = _mm_add_ps(c4, c5);
4723 c6 = _mm_add_ps(c6, c7);
4724 c8 = _mm_add_ps(c8, c9);
4725 cA = _mm_add_ps(cA, cB);
4726 cC = _mm_add_ps(cC, cD);
4727 cE = _mm_add_ps(cE, cF);
4728 c0 = _mm_add_ps(c0, c2);
4729 c4 = _mm_add_ps(c4, c6);
4730 c8 = _mm_add_ps(c8, cA);
4731 cC = _mm_add_ps(cC, cE);
4732 c0 = _mm_add_ps(c0, c4);
4733 c8 = _mm_add_ps(c8, cC);
4734 c0 = _mm_add_ps(c0, c8);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004735 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04004736
4737 _mm_store_ps((float*)(source0 + 4 * x), c0);
4738 }
4739
4740 source0 += pitch;
4741 source1 += pitch;
4742 source2 += pitch;
4743 source3 += pitch;
4744 source4 += pitch;
4745 source5 += pitch;
4746 source6 += pitch;
4747 source7 += pitch;
4748 source8 += pitch;
4749 source9 += pitch;
4750 sourceA += pitch;
4751 sourceB += pitch;
4752 sourceC += pitch;
4753 sourceD += pitch;
4754 sourceE += pitch;
4755 sourceF += pitch;
4756 }
4757 }
4758 else ASSERT(false);
4759 }
4760 else
4761 {
4762 if(internal.depth == 2)
4763 {
4764 for(int y = 0; y < height; y++)
4765 {
4766 for(int x = 0; x < width; x++)
4767 {
4768 float c0 = *(float*)(source0 + 4 * x);
4769 float c1 = *(float*)(source1 + 4 * x);
4770
4771 c0 = c0 + c1;
4772 c0 *= 1.0f / 2.0f;
4773
4774 *(float*)(source0 + 4 * x) = c0;
4775 }
4776
4777 source0 += pitch;
4778 source1 += pitch;
4779 }
4780 }
4781 else if(internal.depth == 4)
4782 {
4783 for(int y = 0; y < height; y++)
4784 {
4785 for(int x = 0; x < width; x++)
4786 {
4787 float c0 = *(float*)(source0 + 4 * x);
4788 float c1 = *(float*)(source1 + 4 * x);
4789 float c2 = *(float*)(source2 + 4 * x);
4790 float c3 = *(float*)(source3 + 4 * x);
4791
4792 c0 = c0 + c1;
4793 c2 = c2 + c3;
4794 c0 = c0 + c2;
4795 c0 *= 1.0f / 4.0f;
4796
4797 *(float*)(source0 + 4 * x) = c0;
4798 }
4799
4800 source0 += pitch;
4801 source1 += pitch;
4802 source2 += pitch;
4803 source3 += pitch;
4804 }
4805 }
4806 else if(internal.depth == 8)
4807 {
4808 for(int y = 0; y < height; y++)
4809 {
4810 for(int x = 0; x < width; x++)
4811 {
4812 float c0 = *(float*)(source0 + 4 * x);
4813 float c1 = *(float*)(source1 + 4 * x);
4814 float c2 = *(float*)(source2 + 4 * x);
4815 float c3 = *(float*)(source3 + 4 * x);
4816 float c4 = *(float*)(source4 + 4 * x);
4817 float c5 = *(float*)(source5 + 4 * x);
4818 float c6 = *(float*)(source6 + 4 * x);
4819 float c7 = *(float*)(source7 + 4 * x);
4820
4821 c0 = c0 + c1;
4822 c2 = c2 + c3;
4823 c4 = c4 + c5;
4824 c6 = c6 + c7;
4825 c0 = c0 + c2;
4826 c4 = c4 + c6;
4827 c0 = c0 + c4;
4828 c0 *= 1.0f / 8.0f;
4829
4830 *(float*)(source0 + 4 * x) = c0;
4831 }
4832
4833 source0 += pitch;
4834 source1 += pitch;
4835 source2 += pitch;
4836 source3 += pitch;
4837 source4 += pitch;
4838 source5 += pitch;
4839 source6 += pitch;
4840 source7 += pitch;
4841 }
4842 }
4843 else if(internal.depth == 16)
4844 {
4845 for(int y = 0; y < height; y++)
4846 {
4847 for(int x = 0; x < width; x++)
4848 {
4849 float c0 = *(float*)(source0 + 4 * x);
4850 float c1 = *(float*)(source1 + 4 * x);
4851 float c2 = *(float*)(source2 + 4 * x);
4852 float c3 = *(float*)(source3 + 4 * x);
4853 float c4 = *(float*)(source4 + 4 * x);
4854 float c5 = *(float*)(source5 + 4 * x);
4855 float c6 = *(float*)(source6 + 4 * x);
4856 float c7 = *(float*)(source7 + 4 * x);
4857 float c8 = *(float*)(source8 + 4 * x);
4858 float c9 = *(float*)(source9 + 4 * x);
4859 float cA = *(float*)(sourceA + 4 * x);
4860 float cB = *(float*)(sourceB + 4 * x);
4861 float cC = *(float*)(sourceC + 4 * x);
4862 float cD = *(float*)(sourceD + 4 * x);
4863 float cE = *(float*)(sourceE + 4 * x);
4864 float cF = *(float*)(sourceF + 4 * x);
4865
4866 c0 = c0 + c1;
4867 c2 = c2 + c3;
4868 c4 = c4 + c5;
4869 c6 = c6 + c7;
4870 c8 = c8 + c9;
4871 cA = cA + cB;
4872 cC = cC + cD;
4873 cE = cE + cF;
4874 c0 = c0 + c2;
4875 c4 = c4 + c6;
4876 c8 = c8 + cA;
4877 cC = cC + cE;
4878 c0 = c0 + c4;
4879 c8 = c8 + cC;
4880 c0 = c0 + c8;
4881 c0 *= 1.0f / 16.0f;
4882
4883 *(float*)(source0 + 4 * x) = c0;
4884 }
4885
4886 source0 += pitch;
4887 source1 += pitch;
4888 source2 += pitch;
4889 source3 += pitch;
4890 source4 += pitch;
4891 source5 += pitch;
4892 source6 += pitch;
4893 source7 += pitch;
4894 source8 += pitch;
4895 source9 += pitch;
4896 sourceA += pitch;
4897 sourceB += pitch;
4898 sourceC += pitch;
4899 sourceD += pitch;
4900 sourceE += pitch;
4901 sourceF += pitch;
4902 }
4903 }
4904 else ASSERT(false);
4905 }
4906 }
4907 else if(internal.format == FORMAT_G32R32F)
4908 {
4909 if(CPUID::supportsSSE() && (width % 2) == 0)
4910 {
4911 if(internal.depth == 2)
4912 {
4913 for(int y = 0; y < height; y++)
4914 {
4915 for(int x = 0; x < width; x += 2)
4916 {
4917 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4918 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004919
John Bauman89401822014-05-06 15:04:28 -04004920 c0 = _mm_add_ps(c0, c1);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004921 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004922
4923 _mm_store_ps((float*)(source0 + 8 * x), c0);
4924 }
4925
4926 source0 += pitch;
4927 source1 += pitch;
4928 }
4929 }
4930 else if(internal.depth == 4)
4931 {
4932 for(int y = 0; y < height; y++)
4933 {
4934 for(int x = 0; x < width; x += 2)
4935 {
4936 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4937 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4938 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4939 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004940
John Bauman89401822014-05-06 15:04:28 -04004941 c0 = _mm_add_ps(c0, c1);
4942 c2 = _mm_add_ps(c2, c3);
4943 c0 = _mm_add_ps(c0, c2);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004944 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004945
4946 _mm_store_ps((float*)(source0 + 8 * x), c0);
4947 }
4948
4949 source0 += pitch;
4950 source1 += pitch;
4951 source2 += pitch;
4952 source3 += pitch;
4953 }
4954 }
4955 else if(internal.depth == 8)
4956 {
4957 for(int y = 0; y < height; y++)
4958 {
4959 for(int x = 0; x < width; x += 2)
4960 {
4961 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4962 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4963 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4964 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4965 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4966 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4967 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4968 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004969
John Bauman89401822014-05-06 15:04:28 -04004970 c0 = _mm_add_ps(c0, c1);
4971 c2 = _mm_add_ps(c2, c3);
4972 c4 = _mm_add_ps(c4, c5);
4973 c6 = _mm_add_ps(c6, c7);
4974 c0 = _mm_add_ps(c0, c2);
4975 c4 = _mm_add_ps(c4, c6);
4976 c0 = _mm_add_ps(c0, c4);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004977 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004978
4979 _mm_store_ps((float*)(source0 + 8 * x), c0);
4980 }
4981
4982 source0 += pitch;
4983 source1 += pitch;
4984 source2 += pitch;
4985 source3 += pitch;
4986 source4 += pitch;
4987 source5 += pitch;
4988 source6 += pitch;
4989 source7 += pitch;
4990 }
4991 }
4992 else if(internal.depth == 16)
4993 {
4994 for(int y = 0; y < height; y++)
4995 {
4996 for(int x = 0; x < width; x += 2)
4997 {
4998 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4999 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5000 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5001 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5002 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5003 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5004 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5005 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5006 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5007 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5008 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5009 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5010 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5011 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5012 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5013 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
5014
5015 c0 = _mm_add_ps(c0, c1);
5016 c2 = _mm_add_ps(c2, c3);
5017 c4 = _mm_add_ps(c4, c5);
5018 c6 = _mm_add_ps(c6, c7);
5019 c8 = _mm_add_ps(c8, c9);
5020 cA = _mm_add_ps(cA, cB);
5021 cC = _mm_add_ps(cC, cD);
5022 cE = _mm_add_ps(cE, cF);
5023 c0 = _mm_add_ps(c0, c2);
5024 c4 = _mm_add_ps(c4, c6);
5025 c8 = _mm_add_ps(c8, cA);
5026 cC = _mm_add_ps(cC, cE);
5027 c0 = _mm_add_ps(c0, c4);
5028 c8 = _mm_add_ps(c8, cC);
5029 c0 = _mm_add_ps(c0, c8);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005030 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005031
5032 _mm_store_ps((float*)(source0 + 8 * x), c0);
5033 }
5034
5035 source0 += pitch;
5036 source1 += pitch;
5037 source2 += pitch;
5038 source3 += pitch;
5039 source4 += pitch;
5040 source5 += pitch;
5041 source6 += pitch;
5042 source7 += pitch;
5043 source8 += pitch;
5044 source9 += pitch;
5045 sourceA += pitch;
5046 sourceB += pitch;
5047 sourceC += pitch;
5048 sourceD += pitch;
5049 sourceE += pitch;
5050 sourceF += pitch;
5051 }
5052 }
5053 else ASSERT(false);
5054 }
5055 else
5056 {
5057 if(internal.depth == 2)
5058 {
5059 for(int y = 0; y < height; y++)
5060 {
5061 for(int x = 0; x < 2 * width; x++)
5062 {
5063 float c0 = *(float*)(source0 + 4 * x);
5064 float c1 = *(float*)(source1 + 4 * x);
5065
5066 c0 = c0 + c1;
5067 c0 *= 1.0f / 2.0f;
5068
5069 *(float*)(source0 + 4 * x) = c0;
5070 }
5071
5072 source0 += pitch;
5073 source1 += pitch;
5074 }
5075 }
5076 else if(internal.depth == 4)
5077 {
5078 for(int y = 0; y < height; y++)
5079 {
5080 for(int x = 0; x < 2 * width; x++)
5081 {
5082 float c0 = *(float*)(source0 + 4 * x);
5083 float c1 = *(float*)(source1 + 4 * x);
5084 float c2 = *(float*)(source2 + 4 * x);
5085 float c3 = *(float*)(source3 + 4 * x);
5086
5087 c0 = c0 + c1;
5088 c2 = c2 + c3;
5089 c0 = c0 + c2;
5090 c0 *= 1.0f / 4.0f;
5091
5092 *(float*)(source0 + 4 * x) = c0;
5093 }
5094
5095 source0 += pitch;
5096 source1 += pitch;
5097 source2 += pitch;
5098 source3 += pitch;
5099 }
5100 }
5101 else if(internal.depth == 8)
5102 {
5103 for(int y = 0; y < height; y++)
5104 {
5105 for(int x = 0; x < 2 * width; x++)
5106 {
5107 float c0 = *(float*)(source0 + 4 * x);
5108 float c1 = *(float*)(source1 + 4 * x);
5109 float c2 = *(float*)(source2 + 4 * x);
5110 float c3 = *(float*)(source3 + 4 * x);
5111 float c4 = *(float*)(source4 + 4 * x);
5112 float c5 = *(float*)(source5 + 4 * x);
5113 float c6 = *(float*)(source6 + 4 * x);
5114 float c7 = *(float*)(source7 + 4 * x);
5115
5116 c0 = c0 + c1;
5117 c2 = c2 + c3;
5118 c4 = c4 + c5;
5119 c6 = c6 + c7;
5120 c0 = c0 + c2;
5121 c4 = c4 + c6;
5122 c0 = c0 + c4;
5123 c0 *= 1.0f / 8.0f;
5124
5125 *(float*)(source0 + 4 * x) = c0;
5126 }
5127
5128 source0 += pitch;
5129 source1 += pitch;
5130 source2 += pitch;
5131 source3 += pitch;
5132 source4 += pitch;
5133 source5 += pitch;
5134 source6 += pitch;
5135 source7 += pitch;
5136 }
5137 }
5138 else if(internal.depth == 16)
5139 {
5140 for(int y = 0; y < height; y++)
5141 {
5142 for(int x = 0; x < 2 * width; x++)
5143 {
5144 float c0 = *(float*)(source0 + 4 * x);
5145 float c1 = *(float*)(source1 + 4 * x);
5146 float c2 = *(float*)(source2 + 4 * x);
5147 float c3 = *(float*)(source3 + 4 * x);
5148 float c4 = *(float*)(source4 + 4 * x);
5149 float c5 = *(float*)(source5 + 4 * x);
5150 float c6 = *(float*)(source6 + 4 * x);
5151 float c7 = *(float*)(source7 + 4 * x);
5152 float c8 = *(float*)(source8 + 4 * x);
5153 float c9 = *(float*)(source9 + 4 * x);
5154 float cA = *(float*)(sourceA + 4 * x);
5155 float cB = *(float*)(sourceB + 4 * x);
5156 float cC = *(float*)(sourceC + 4 * x);
5157 float cD = *(float*)(sourceD + 4 * x);
5158 float cE = *(float*)(sourceE + 4 * x);
5159 float cF = *(float*)(sourceF + 4 * x);
5160
5161 c0 = c0 + c1;
5162 c2 = c2 + c3;
5163 c4 = c4 + c5;
5164 c6 = c6 + c7;
5165 c8 = c8 + c9;
5166 cA = cA + cB;
5167 cC = cC + cD;
5168 cE = cE + cF;
5169 c0 = c0 + c2;
5170 c4 = c4 + c6;
5171 c8 = c8 + cA;
5172 cC = cC + cE;
5173 c0 = c0 + c4;
5174 c8 = c8 + cC;
5175 c0 = c0 + c8;
5176 c0 *= 1.0f / 16.0f;
5177
5178 *(float*)(source0 + 4 * x) = c0;
5179 }
5180
5181 source0 += pitch;
5182 source1 += pitch;
5183 source2 += pitch;
5184 source3 += pitch;
5185 source4 += pitch;
5186 source5 += pitch;
5187 source6 += pitch;
5188 source7 += pitch;
5189 source8 += pitch;
5190 source9 += pitch;
5191 sourceA += pitch;
5192 sourceB += pitch;
5193 sourceC += pitch;
5194 sourceD += pitch;
5195 sourceE += pitch;
5196 sourceF += pitch;
5197 }
5198 }
5199 else ASSERT(false);
5200 }
5201 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005202 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005203 {
5204 if(CPUID::supportsSSE())
5205 {
5206 if(internal.depth == 2)
5207 {
5208 for(int y = 0; y < height; y++)
5209 {
5210 for(int x = 0; x < width; x++)
5211 {
5212 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5213 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005214
John Bauman89401822014-05-06 15:04:28 -04005215 c0 = _mm_add_ps(c0, c1);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005216 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005217
5218 _mm_store_ps((float*)(source0 + 16 * x), c0);
5219 }
5220
5221 source0 += pitch;
5222 source1 += pitch;
5223 }
5224 }
5225 else if(internal.depth == 4)
5226 {
5227 for(int y = 0; y < height; y++)
5228 {
5229 for(int x = 0; x < width; x++)
5230 {
5231 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5232 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5233 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5234 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005235
John Bauman89401822014-05-06 15:04:28 -04005236 c0 = _mm_add_ps(c0, c1);
5237 c2 = _mm_add_ps(c2, c3);
5238 c0 = _mm_add_ps(c0, c2);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005239 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005240
5241 _mm_store_ps((float*)(source0 + 16 * x), c0);
5242 }
5243
5244 source0 += pitch;
5245 source1 += pitch;
5246 source2 += pitch;
5247 source3 += pitch;
5248 }
5249 }
5250 else if(internal.depth == 8)
5251 {
5252 for(int y = 0; y < height; y++)
5253 {
5254 for(int x = 0; x < width; x++)
5255 {
5256 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5257 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5258 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5259 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5260 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5261 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5262 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5263 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005264
John Bauman89401822014-05-06 15:04:28 -04005265 c0 = _mm_add_ps(c0, c1);
5266 c2 = _mm_add_ps(c2, c3);
5267 c4 = _mm_add_ps(c4, c5);
5268 c6 = _mm_add_ps(c6, c7);
5269 c0 = _mm_add_ps(c0, c2);
5270 c4 = _mm_add_ps(c4, c6);
5271 c0 = _mm_add_ps(c0, c4);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005272 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005273
5274 _mm_store_ps((float*)(source0 + 16 * x), c0);
5275 }
5276
5277 source0 += pitch;
5278 source1 += pitch;
5279 source2 += pitch;
5280 source3 += pitch;
5281 source4 += pitch;
5282 source5 += pitch;
5283 source6 += pitch;
5284 source7 += pitch;
5285 }
5286 }
5287 else if(internal.depth == 16)
5288 {
5289 for(int y = 0; y < height; y++)
5290 {
5291 for(int x = 0; x < width; x++)
5292 {
5293 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5294 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5295 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5296 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5297 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5298 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5299 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5300 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5301 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5302 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5303 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5304 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5305 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5306 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5307 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5308 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
5309
5310 c0 = _mm_add_ps(c0, c1);
5311 c2 = _mm_add_ps(c2, c3);
5312 c4 = _mm_add_ps(c4, c5);
5313 c6 = _mm_add_ps(c6, c7);
5314 c8 = _mm_add_ps(c8, c9);
5315 cA = _mm_add_ps(cA, cB);
5316 cC = _mm_add_ps(cC, cD);
5317 cE = _mm_add_ps(cE, cF);
5318 c0 = _mm_add_ps(c0, c2);
5319 c4 = _mm_add_ps(c4, c6);
5320 c8 = _mm_add_ps(c8, cA);
5321 cC = _mm_add_ps(cC, cE);
5322 c0 = _mm_add_ps(c0, c4);
5323 c8 = _mm_add_ps(c8, cC);
5324 c0 = _mm_add_ps(c0, c8);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005325 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005326
5327 _mm_store_ps((float*)(source0 + 16 * x), c0);
5328 }
5329
5330 source0 += pitch;
5331 source1 += pitch;
5332 source2 += pitch;
5333 source3 += pitch;
5334 source4 += pitch;
5335 source5 += pitch;
5336 source6 += pitch;
5337 source7 += pitch;
5338 source8 += pitch;
5339 source9 += pitch;
5340 sourceA += pitch;
5341 sourceB += pitch;
5342 sourceC += pitch;
5343 sourceD += pitch;
5344 sourceE += pitch;
5345 sourceF += pitch;
5346 }
5347 }
5348 else ASSERT(false);
5349 }
5350 else
5351 {
5352 if(internal.depth == 2)
5353 {
5354 for(int y = 0; y < height; y++)
5355 {
5356 for(int x = 0; x < 4 * width; x++)
5357 {
5358 float c0 = *(float*)(source0 + 4 * x);
5359 float c1 = *(float*)(source1 + 4 * x);
5360
5361 c0 = c0 + c1;
5362 c0 *= 1.0f / 2.0f;
5363
5364 *(float*)(source0 + 4 * x) = c0;
5365 }
5366
5367 source0 += pitch;
5368 source1 += pitch;
5369 }
5370 }
5371 else if(internal.depth == 4)
5372 {
5373 for(int y = 0; y < height; y++)
5374 {
5375 for(int x = 0; x < 4 * width; x++)
5376 {
5377 float c0 = *(float*)(source0 + 4 * x);
5378 float c1 = *(float*)(source1 + 4 * x);
5379 float c2 = *(float*)(source2 + 4 * x);
5380 float c3 = *(float*)(source3 + 4 * x);
5381
5382 c0 = c0 + c1;
5383 c2 = c2 + c3;
5384 c0 = c0 + c2;
5385 c0 *= 1.0f / 4.0f;
5386
5387 *(float*)(source0 + 4 * x) = c0;
5388 }
5389
5390 source0 += pitch;
5391 source1 += pitch;
5392 source2 += pitch;
5393 source3 += pitch;
5394 }
5395 }
5396 else if(internal.depth == 8)
5397 {
5398 for(int y = 0; y < height; y++)
5399 {
5400 for(int x = 0; x < 4 * width; x++)
5401 {
5402 float c0 = *(float*)(source0 + 4 * x);
5403 float c1 = *(float*)(source1 + 4 * x);
5404 float c2 = *(float*)(source2 + 4 * x);
5405 float c3 = *(float*)(source3 + 4 * x);
5406 float c4 = *(float*)(source4 + 4 * x);
5407 float c5 = *(float*)(source5 + 4 * x);
5408 float c6 = *(float*)(source6 + 4 * x);
5409 float c7 = *(float*)(source7 + 4 * x);
5410
5411 c0 = c0 + c1;
5412 c2 = c2 + c3;
5413 c4 = c4 + c5;
5414 c6 = c6 + c7;
5415 c0 = c0 + c2;
5416 c4 = c4 + c6;
5417 c0 = c0 + c4;
5418 c0 *= 1.0f / 8.0f;
5419
5420 *(float*)(source0 + 4 * x) = c0;
5421 }
5422
5423 source0 += pitch;
5424 source1 += pitch;
5425 source2 += pitch;
5426 source3 += pitch;
5427 source4 += pitch;
5428 source5 += pitch;
5429 source6 += pitch;
5430 source7 += pitch;
5431 }
5432 }
5433 else if(internal.depth == 16)
5434 {
5435 for(int y = 0; y < height; y++)
5436 {
5437 for(int x = 0; x < 4 * width; x++)
5438 {
5439 float c0 = *(float*)(source0 + 4 * x);
5440 float c1 = *(float*)(source1 + 4 * x);
5441 float c2 = *(float*)(source2 + 4 * x);
5442 float c3 = *(float*)(source3 + 4 * x);
5443 float c4 = *(float*)(source4 + 4 * x);
5444 float c5 = *(float*)(source5 + 4 * x);
5445 float c6 = *(float*)(source6 + 4 * x);
5446 float c7 = *(float*)(source7 + 4 * x);
5447 float c8 = *(float*)(source8 + 4 * x);
5448 float c9 = *(float*)(source9 + 4 * x);
5449 float cA = *(float*)(sourceA + 4 * x);
5450 float cB = *(float*)(sourceB + 4 * x);
5451 float cC = *(float*)(sourceC + 4 * x);
5452 float cD = *(float*)(sourceD + 4 * x);
5453 float cE = *(float*)(sourceE + 4 * x);
5454 float cF = *(float*)(sourceF + 4 * x);
5455
5456 c0 = c0 + c1;
5457 c2 = c2 + c3;
5458 c4 = c4 + c5;
5459 c6 = c6 + c7;
5460 c8 = c8 + c9;
5461 cA = cA + cB;
5462 cC = cC + cD;
5463 cE = cE + cF;
5464 c0 = c0 + c2;
5465 c4 = c4 + c6;
5466 c8 = c8 + cA;
5467 cC = cC + cE;
5468 c0 = c0 + c4;
5469 c8 = c8 + cC;
5470 c0 = c0 + c8;
5471 c0 *= 1.0f / 16.0f;
5472
5473 *(float*)(source0 + 4 * x) = c0;
5474 }
5475
5476 source0 += pitch;
5477 source1 += pitch;
5478 source2 += pitch;
5479 source3 += pitch;
5480 source4 += pitch;
5481 source5 += pitch;
5482 source6 += pitch;
5483 source7 += pitch;
5484 source8 += pitch;
5485 source9 += pitch;
5486 sourceA += pitch;
5487 sourceB += pitch;
5488 sourceC += pitch;
5489 sourceD += pitch;
5490 sourceE += pitch;
5491 sourceF += pitch;
5492 }
5493 }
5494 else ASSERT(false);
5495 }
5496 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005497 else if(internal.format == FORMAT_R5G6B5)
5498 {
5499 if(CPUID::supportsSSE2() && (width % 8) == 0)
5500 {
5501 if(internal.depth == 2)
5502 {
5503 for(int y = 0; y < height; y++)
5504 {
5505 for(int x = 0; x < width; x += 8)
5506 {
5507 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5508 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005509
Nicolas Capens0e12a922015-09-04 09:18:15 -04005510 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5511 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5512 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5513 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5514 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5515 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5516
5517 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5518 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5519 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5520 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5521 c0 = _mm_or_si128(c0, c1);
5522
5523 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5524 }
5525
5526 source0 += pitch;
5527 source1 += pitch;
5528 }
5529 }
5530 else if(internal.depth == 4)
5531 {
5532 for(int y = 0; y < height; y++)
5533 {
5534 for(int x = 0; x < width; x += 8)
5535 {
5536 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5537 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5538 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5539 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005540
Nicolas Capens0e12a922015-09-04 09:18:15 -04005541 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5542 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5543 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5544 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5545 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5546 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5547 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5548 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5549 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5550 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5551
5552 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5553 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5554 c0 = _mm_avg_epu8(c0, c2);
5555 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5556 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5557 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5558 c1 = _mm_avg_epu16(c1, c3);
5559 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5560 c0 = _mm_or_si128(c0, c1);
5561
5562 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5563 }
5564
5565 source0 += pitch;
5566 source1 += pitch;
5567 source2 += pitch;
5568 source3 += pitch;
5569 }
5570 }
5571 else if(internal.depth == 8)
5572 {
5573 for(int y = 0; y < height; y++)
5574 {
5575 for(int x = 0; x < width; x += 8)
5576 {
5577 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5578 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5579 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5580 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5581 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5582 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5583 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5584 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005585
Nicolas Capens0e12a922015-09-04 09:18:15 -04005586 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5587 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5588 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5589 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5590 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5591 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5592 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5593 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5594 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5595 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5596 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5597 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5598 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5599 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5600 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5601 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5602 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5603 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5604
5605 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5606 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5607 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5608 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5609 c0 = _mm_avg_epu8(c0, c2);
5610 c4 = _mm_avg_epu8(c4, c6);
5611 c0 = _mm_avg_epu8(c0, c4);
5612 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5613 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5614 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5615 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5616 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5617 c1 = _mm_avg_epu16(c1, c3);
5618 c5 = _mm_avg_epu16(c5, c7);
5619 c1 = _mm_avg_epu16(c1, c5);
5620 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5621 c0 = _mm_or_si128(c0, c1);
5622
5623 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5624 }
5625
5626 source0 += pitch;
5627 source1 += pitch;
5628 source2 += pitch;
5629 source3 += pitch;
5630 source4 += pitch;
5631 source5 += pitch;
5632 source6 += pitch;
5633 source7 += pitch;
5634 }
5635 }
5636 else if(internal.depth == 16)
5637 {
5638 for(int y = 0; y < height; y++)
5639 {
5640 for(int x = 0; x < width; x += 8)
5641 {
5642 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5643 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5644 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5645 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5646 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5647 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5648 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5649 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5650 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5651 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5652 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5653 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5654 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5655 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5656 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5657 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
5658
5659 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5660 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5661 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5662 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5663 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5664 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5665 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5666 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5667 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5668 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5669 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5670 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5671 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5672 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5673 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5674 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5675 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5676 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5677 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5678 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5679 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5680 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5681 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5682 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5683 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5684 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5685 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5686 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5687 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5688 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5689 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5690 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5691 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5692 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
5693
5694 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5695 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5696 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5697 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5698 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5699 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5700 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5701 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5702 c0 = _mm_avg_epu8(c0, c2);
5703 c4 = _mm_avg_epu8(c4, c6);
5704 c8 = _mm_avg_epu8(c8, cA);
5705 cC = _mm_avg_epu8(cC, cE);
5706 c0 = _mm_avg_epu8(c0, c4);
5707 c8 = _mm_avg_epu8(c8, cC);
5708 c0 = _mm_avg_epu8(c0, c8);
5709 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5710 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5711 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5712 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5713 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5714 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5715 cB = _mm_avg_epu16(cA__g_, cB__g_);
5716 cD = _mm_avg_epu16(cC__g_, cD__g_);
5717 cF = _mm_avg_epu16(cE__g_, cF__g_);
5718 c1 = _mm_avg_epu8(c1, c3);
5719 c5 = _mm_avg_epu8(c5, c7);
5720 c9 = _mm_avg_epu8(c9, cB);
5721 cD = _mm_avg_epu8(cD, cF);
5722 c1 = _mm_avg_epu8(c1, c5);
5723 c9 = _mm_avg_epu8(c9, cD);
5724 c1 = _mm_avg_epu8(c1, c9);
5725 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5726 c0 = _mm_or_si128(c0, c1);
5727
5728 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5729 }
5730
5731 source0 += pitch;
5732 source1 += pitch;
5733 source2 += pitch;
5734 source3 += pitch;
5735 source4 += pitch;
5736 source5 += pitch;
5737 source6 += pitch;
5738 source7 += pitch;
5739 source8 += pitch;
5740 source9 += pitch;
5741 sourceA += pitch;
5742 sourceB += pitch;
5743 sourceC += pitch;
5744 sourceD += pitch;
5745 sourceE += pitch;
5746 sourceF += pitch;
5747 }
5748 }
5749 else ASSERT(false);
5750 }
5751 else
5752 {
5753 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
5754
5755 if(internal.depth == 2)
5756 {
5757 for(int y = 0; y < height; y++)
5758 {
5759 for(int x = 0; x < width; x++)
5760 {
5761 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5762 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5763
5764 c0 = AVERAGE(c0, c1);
5765
5766 *(unsigned short*)(source0 + 2 * x) = c0;
5767 }
5768
5769 source0 += pitch;
5770 source1 += pitch;
5771 }
5772 }
5773 else if(internal.depth == 4)
5774 {
5775 for(int y = 0; y < height; y++)
5776 {
5777 for(int x = 0; x < width; x++)
5778 {
5779 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5780 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5781 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5782 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5783
5784 c0 = AVERAGE(c0, c1);
5785 c2 = AVERAGE(c2, c3);
5786 c0 = AVERAGE(c0, c2);
5787
5788 *(unsigned short*)(source0 + 2 * x) = c0;
5789 }
5790
5791 source0 += pitch;
5792 source1 += pitch;
5793 source2 += pitch;
5794 source3 += pitch;
5795 }
5796 }
5797 else if(internal.depth == 8)
5798 {
5799 for(int y = 0; y < height; y++)
5800 {
5801 for(int x = 0; x < width; x++)
5802 {
5803 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5804 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5805 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5806 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5807 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5808 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5809 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5810 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5811
5812 c0 = AVERAGE(c0, c1);
5813 c2 = AVERAGE(c2, c3);
5814 c4 = AVERAGE(c4, c5);
5815 c6 = AVERAGE(c6, c7);
5816 c0 = AVERAGE(c0, c2);
5817 c4 = AVERAGE(c4, c6);
5818 c0 = AVERAGE(c0, c4);
5819
5820 *(unsigned short*)(source0 + 2 * x) = c0;
5821 }
5822
5823 source0 += pitch;
5824 source1 += pitch;
5825 source2 += pitch;
5826 source3 += pitch;
5827 source4 += pitch;
5828 source5 += pitch;
5829 source6 += pitch;
5830 source7 += pitch;
5831 }
5832 }
5833 else if(internal.depth == 16)
5834 {
5835 for(int y = 0; y < height; y++)
5836 {
5837 for(int x = 0; x < width; x++)
5838 {
5839 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5840 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5841 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5842 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5843 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5844 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5845 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5846 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5847 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
5848 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
5849 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
5850 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
5851 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
5852 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
5853 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
5854 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
5855
5856 c0 = AVERAGE(c0, c1);
5857 c2 = AVERAGE(c2, c3);
5858 c4 = AVERAGE(c4, c5);
5859 c6 = AVERAGE(c6, c7);
5860 c8 = AVERAGE(c8, c9);
5861 cA = AVERAGE(cA, cB);
5862 cC = AVERAGE(cC, cD);
5863 cE = AVERAGE(cE, cF);
5864 c0 = AVERAGE(c0, c2);
5865 c4 = AVERAGE(c4, c6);
5866 c8 = AVERAGE(c8, cA);
5867 cC = AVERAGE(cC, cE);
5868 c0 = AVERAGE(c0, c4);
5869 c8 = AVERAGE(c8, cC);
5870 c0 = AVERAGE(c0, c8);
5871
5872 *(unsigned short*)(source0 + 2 * x) = c0;
5873 }
5874
5875 source0 += pitch;
5876 source1 += pitch;
5877 source2 += pitch;
5878 source3 += pitch;
5879 source4 += pitch;
5880 source5 += pitch;
5881 source6 += pitch;
5882 source7 += pitch;
5883 source8 += pitch;
5884 source9 += pitch;
5885 sourceA += pitch;
5886 sourceB += pitch;
5887 sourceC += pitch;
5888 sourceD += pitch;
5889 sourceE += pitch;
5890 sourceF += pitch;
5891 }
5892 }
5893 else ASSERT(false);
5894
5895 #undef AVERAGE
5896 }
5897 }
John Bauman89401822014-05-06 15:04:28 -04005898 else
5899 {
5900 // UNIMPLEMENTED();
5901 }
5902 }
5903}