blob: c7ca71aefee568a1b6cc835e882277fbc716195f [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
Nicolas Capens47dc8672017-04-25 12:54:39 -040028#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
John Bauman89401822014-05-06 15:04:28 -040032
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
Alexis Hetu147f6682017-02-09 17:14:34 -050044 void Surface::typeinfo() {}
John Bauman89401822014-05-06 15:04:28 -040045
John Bauman19bac1e2014-05-06 15:23:49 -040046 void Rect::clip(int minX, int minY, int maxX, int maxY)
47 {
Nicolas Capens22658242014-11-29 00:31:41 -050048 x0 = clamp(x0, minX, maxX);
49 y0 = clamp(y0, minY, maxY);
50 x1 = clamp(x1, minX, maxX);
51 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040052 }
53
John Bauman89401822014-05-06 15:04:28 -040054 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
55 {
56 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
57
58 write(element, color);
59 }
60
61 void Surface::Buffer::write(int x, int y, const Color<float> &color)
62 {
63 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
64
65 write(element, color);
66 }
67
68 inline void Surface::Buffer::write(void *element, const Color<float> &color)
69 {
70 switch(format)
71 {
72 case FORMAT_A8:
73 *(unsigned char*)element = unorm<8>(color.a);
74 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040075 case FORMAT_R8I_SNORM:
76 *(char*)element = snorm<8>(color.r);
77 break;
John Bauman89401822014-05-06 15:04:28 -040078 case FORMAT_R8:
79 *(unsigned char*)element = unorm<8>(color.r);
80 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040081 case FORMAT_R8I:
82 *(char*)element = scast<8>(color.r);
83 break;
84 case FORMAT_R8UI:
85 *(unsigned char*)element = ucast<8>(color.r);
86 break;
87 case FORMAT_R16I:
88 *(short*)element = scast<16>(color.r);
89 break;
90 case FORMAT_R16UI:
91 *(unsigned short*)element = ucast<16>(color.r);
92 break;
93 case FORMAT_R32I:
94 *(int*)element = static_cast<int>(color.r);
95 break;
96 case FORMAT_R32UI:
97 *(unsigned int*)element = static_cast<unsigned int>(color.r);
98 break;
John Bauman89401822014-05-06 15:04:28 -040099 case FORMAT_R3G3B2:
100 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
101 break;
102 case FORMAT_A8R3G3B2:
103 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
104 break;
105 case FORMAT_X4R4G4B4:
106 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
107 break;
108 case FORMAT_A4R4G4B4:
109 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
110 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400111 case FORMAT_R4G4B4A4:
112 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
113 break;
John Bauman89401822014-05-06 15:04:28 -0400114 case FORMAT_R5G6B5:
115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
116 break;
117 case FORMAT_A1R5G5B5:
118 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
119 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400120 case FORMAT_R5G5B5A1:
121 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
122 break;
John Bauman89401822014-05-06 15:04:28 -0400123 case FORMAT_X1R5G5B5:
124 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
125 break;
126 case FORMAT_A8R8G8B8:
127 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
128 break;
129 case FORMAT_X8R8G8B8:
130 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
131 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400132 case FORMAT_A8B8G8R8I_SNORM:
133 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
134 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
135 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
136 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
137 break;
John Bauman89401822014-05-06 15:04:28 -0400138 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400139 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400140 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
141 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400142 case FORMAT_A8B8G8R8I:
143 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
144 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
145 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
146 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
147 break;
148 case FORMAT_A8B8G8R8UI:
149 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
150 break;
151 case FORMAT_X8B8G8R8I_SNORM:
152 *(unsigned int*)element = 0x7F000000 |
153 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
154 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
155 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
156 break;
John Bauman89401822014-05-06 15:04:28 -0400157 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400158 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400159 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
160 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400161 case FORMAT_X8B8G8R8I:
162 *(unsigned int*)element = 0x7F000000 |
163 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
164 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
165 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
166 case FORMAT_X8B8G8R8UI:
167 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
168 break;
John Bauman89401822014-05-06 15:04:28 -0400169 case FORMAT_A2R10G10B10:
170 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
171 break;
172 case FORMAT_A2B10G10R10:
173 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
174 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400175 case FORMAT_G8R8I_SNORM:
176 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
177 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
178 break;
John Bauman89401822014-05-06 15:04:28 -0400179 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400180 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
181 break;
182 case FORMAT_G8R8I:
183 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
184 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
185 break;
186 case FORMAT_G8R8UI:
187 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400188 break;
189 case FORMAT_G16R16:
190 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
191 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400192 case FORMAT_G16R16I:
193 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
194 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
195 break;
196 case FORMAT_G16R16UI:
197 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
198 break;
199 case FORMAT_G32R32I:
200 case FORMAT_G32R32UI:
201 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
202 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
203 break;
John Bauman89401822014-05-06 15:04:28 -0400204 case FORMAT_A16B16G16R16:
205 ((unsigned short*)element)[0] = unorm<16>(color.r);
206 ((unsigned short*)element)[1] = unorm<16>(color.g);
207 ((unsigned short*)element)[2] = unorm<16>(color.b);
208 ((unsigned short*)element)[3] = unorm<16>(color.a);
209 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400210 case FORMAT_A16B16G16R16I:
211 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
212 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
213 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
214 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
215 break;
216 case FORMAT_A16B16G16R16UI:
217 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
218 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
219 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
220 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
221 break;
222 case FORMAT_X16B16G16R16I:
223 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
224 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
225 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
226 break;
227 case FORMAT_X16B16G16R16UI:
228 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
229 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
230 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
231 break;
232 case FORMAT_A32B32G32R32I:
233 case FORMAT_A32B32G32R32UI:
234 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
235 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
236 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
237 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
238 break;
239 case FORMAT_X32B32G32R32I:
240 case FORMAT_X32B32G32R32UI:
241 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
242 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
243 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
244 break;
John Bauman89401822014-05-06 15:04:28 -0400245 case FORMAT_V8U8:
246 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
247 break;
248 case FORMAT_L6V5U5:
249 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
250 break;
251 case FORMAT_Q8W8V8U8:
252 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
253 break;
254 case FORMAT_X8L8V8U8:
255 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
256 break;
257 case FORMAT_V16U16:
258 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
259 break;
260 case FORMAT_A2W10V10U10:
261 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
262 break;
263 case FORMAT_A16W16V16U16:
264 ((unsigned short*)element)[0] = snorm<16>(color.r);
265 ((unsigned short*)element)[1] = snorm<16>(color.g);
266 ((unsigned short*)element)[2] = snorm<16>(color.b);
267 ((unsigned short*)element)[3] = unorm<16>(color.a);
268 break;
269 case FORMAT_Q16W16V16U16:
270 ((unsigned short*)element)[0] = snorm<16>(color.r);
271 ((unsigned short*)element)[1] = snorm<16>(color.g);
272 ((unsigned short*)element)[2] = snorm<16>(color.b);
273 ((unsigned short*)element)[3] = snorm<16>(color.a);
274 break;
275 case FORMAT_R8G8B8:
276 ((unsigned char*)element)[0] = unorm<8>(color.b);
277 ((unsigned char*)element)[1] = unorm<8>(color.g);
278 ((unsigned char*)element)[2] = unorm<8>(color.r);
279 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400280 case FORMAT_B8G8R8:
281 ((unsigned char*)element)[0] = unorm<8>(color.r);
282 ((unsigned char*)element)[1] = unorm<8>(color.g);
283 ((unsigned char*)element)[2] = unorm<8>(color.b);
284 break;
John Bauman89401822014-05-06 15:04:28 -0400285 case FORMAT_R16F:
286 *(half*)element = (half)color.r;
287 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400288 case FORMAT_A16F:
289 *(half*)element = (half)color.a;
290 break;
John Bauman89401822014-05-06 15:04:28 -0400291 case FORMAT_G16R16F:
292 ((half*)element)[0] = (half)color.r;
293 ((half*)element)[1] = (half)color.g;
294 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400295 case FORMAT_B16G16R16F:
296 ((half*)element)[0] = (half)color.r;
297 ((half*)element)[1] = (half)color.g;
298 ((half*)element)[2] = (half)color.b;
299 break;
John Bauman89401822014-05-06 15:04:28 -0400300 case FORMAT_A16B16G16R16F:
301 ((half*)element)[0] = (half)color.r;
302 ((half*)element)[1] = (half)color.g;
303 ((half*)element)[2] = (half)color.b;
304 ((half*)element)[3] = (half)color.a;
305 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400306 case FORMAT_A32F:
307 *(float*)element = color.a;
308 break;
John Bauman89401822014-05-06 15:04:28 -0400309 case FORMAT_R32F:
310 *(float*)element = color.r;
311 break;
312 case FORMAT_G32R32F:
313 ((float*)element)[0] = color.r;
314 ((float*)element)[1] = color.g;
315 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400316 case FORMAT_X32B32G32R32F:
317 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400318 case FORMAT_B32G32R32F:
319 ((float*)element)[0] = color.r;
320 ((float*)element)[1] = color.g;
321 ((float*)element)[2] = color.b;
322 break;
John Bauman89401822014-05-06 15:04:28 -0400323 case FORMAT_A32B32G32R32F:
324 ((float*)element)[0] = color.r;
325 ((float*)element)[1] = color.g;
326 ((float*)element)[2] = color.b;
327 ((float*)element)[3] = color.a;
328 break;
329 case FORMAT_D32F:
330 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400331 case FORMAT_D32FS8_TEXTURE:
332 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400333 *((float*)element) = color.r;
334 break;
335 case FORMAT_D32F_COMPLEMENTARY:
336 *((float*)element) = 1 - color.r;
337 break;
338 case FORMAT_S8:
339 *((unsigned char*)element) = unorm<8>(color.r);
340 break;
341 case FORMAT_L8:
342 *(unsigned char*)element = unorm<8>(color.r);
343 break;
344 case FORMAT_A4L4:
345 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
346 break;
347 case FORMAT_L16:
348 *(unsigned short*)element = unorm<16>(color.r);
349 break;
350 case FORMAT_A8L8:
351 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
352 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400353 case FORMAT_L16F:
354 *(half*)element = (half)color.r;
355 break;
356 case FORMAT_A16L16F:
357 ((half*)element)[0] = (half)color.r;
358 ((half*)element)[1] = (half)color.a;
359 break;
360 case FORMAT_L32F:
361 *(float*)element = color.r;
362 break;
363 case FORMAT_A32L32F:
364 ((float*)element)[0] = color.r;
365 ((float*)element)[1] = color.a;
366 break;
John Bauman89401822014-05-06 15:04:28 -0400367 default:
368 ASSERT(false);
369 }
370 }
371
372 Color<float> Surface::Buffer::read(int x, int y, int z) const
373 {
374 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
375
376 return read(element);
377 }
378
379 Color<float> Surface::Buffer::read(int x, int y) const
380 {
381 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
382
383 return read(element);
384 }
385
386 inline Color<float> Surface::Buffer::read(void *element) const
387 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400388 float r = 0.0f;
389 float g = 0.0f;
390 float b = 0.0f;
391 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400392
393 switch(format)
394 {
395 case FORMAT_P8:
396 {
397 ASSERT(palette);
398
399 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400400
John Bauman89401822014-05-06 15:04:28 -0400401 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
402 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
403 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
404 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
405 }
406 break;
407 case FORMAT_A8P8:
408 {
409 ASSERT(palette);
410
411 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400412
John Bauman89401822014-05-06 15:04:28 -0400413 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
414 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
415 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
416 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
417 }
418 break;
419 case FORMAT_A8:
420 r = 0;
421 g = 0;
422 b = 0;
423 a = *(unsigned char*)element * (1.0f / 0xFF);
424 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400425 case FORMAT_R8I_SNORM:
426 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
427 break;
John Bauman89401822014-05-06 15:04:28 -0400428 case FORMAT_R8:
429 r = *(unsigned char*)element * (1.0f / 0xFF);
430 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400431 case FORMAT_R8I:
432 r = *(signed char*)element;
433 break;
434 case FORMAT_R8UI:
435 r = *(unsigned char*)element;
436 break;
John Bauman89401822014-05-06 15:04:28 -0400437 case FORMAT_R3G3B2:
438 {
439 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400440
John Bauman89401822014-05-06 15:04:28 -0400441 r = (rgb & 0xE0) * (1.0f / 0xE0);
442 g = (rgb & 0x1C) * (1.0f / 0x1C);
443 b = (rgb & 0x03) * (1.0f / 0x03);
444 }
445 break;
446 case FORMAT_A8R3G3B2:
447 {
448 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400449
John Bauman89401822014-05-06 15:04:28 -0400450 a = (argb & 0xFF00) * (1.0f / 0xFF00);
451 r = (argb & 0x00E0) * (1.0f / 0x00E0);
452 g = (argb & 0x001C) * (1.0f / 0x001C);
453 b = (argb & 0x0003) * (1.0f / 0x0003);
454 }
455 break;
456 case FORMAT_X4R4G4B4:
457 {
458 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400459
John Bauman89401822014-05-06 15:04:28 -0400460 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
461 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
462 b = (rgb & 0x000F) * (1.0f / 0x000F);
463 }
464 break;
465 case FORMAT_A4R4G4B4:
466 {
467 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400468
John Bauman89401822014-05-06 15:04:28 -0400469 a = (argb & 0xF000) * (1.0f / 0xF000);
470 r = (argb & 0x0F00) * (1.0f / 0x0F00);
471 g = (argb & 0x00F0) * (1.0f / 0x00F0);
472 b = (argb & 0x000F) * (1.0f / 0x000F);
473 }
474 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400475 case FORMAT_R4G4B4A4:
476 {
477 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400478
Nicolas Capens80594422015-06-09 16:42:56 -0400479 r = (rgba & 0xF000) * (1.0f / 0xF000);
480 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
481 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
482 a = (rgba & 0x000F) * (1.0f / 0x000F);
483 }
484 break;
John Bauman89401822014-05-06 15:04:28 -0400485 case FORMAT_R5G6B5:
486 {
487 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400488
John Bauman89401822014-05-06 15:04:28 -0400489 r = (rgb & 0xF800) * (1.0f / 0xF800);
490 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
491 b = (rgb & 0x001F) * (1.0f / 0x001F);
492 }
493 break;
494 case FORMAT_A1R5G5B5:
495 {
496 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400497
John Bauman89401822014-05-06 15:04:28 -0400498 a = (argb & 0x8000) * (1.0f / 0x8000);
499 r = (argb & 0x7C00) * (1.0f / 0x7C00);
500 g = (argb & 0x03E0) * (1.0f / 0x03E0);
501 b = (argb & 0x001F) * (1.0f / 0x001F);
502 }
503 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400504 case FORMAT_R5G5B5A1:
505 {
506 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400507
Nicolas Capens80594422015-06-09 16:42:56 -0400508 r = (rgba & 0xF800) * (1.0f / 0xF800);
509 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
510 b = (rgba & 0x003E) * (1.0f / 0x003E);
511 a = (rgba & 0x0001) * (1.0f / 0x0001);
512 }
513 break;
John Bauman89401822014-05-06 15:04:28 -0400514 case FORMAT_X1R5G5B5:
515 {
516 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400517
John Bauman89401822014-05-06 15:04:28 -0400518 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
519 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
520 b = (xrgb & 0x001F) * (1.0f / 0x001F);
521 }
522 break;
523 case FORMAT_A8R8G8B8:
524 {
525 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400526
John Bauman89401822014-05-06 15:04:28 -0400527 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
528 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
529 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
530 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
531 }
532 break;
533 case FORMAT_X8R8G8B8:
534 {
535 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400536
John Bauman89401822014-05-06 15:04:28 -0400537 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
538 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
539 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
540 }
541 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400542 case FORMAT_A8B8G8R8I_SNORM:
543 {
544 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400545
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400546 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
547 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
548 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
549 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
550 }
551 break;
John Bauman89401822014-05-06 15:04:28 -0400552 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400553 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400554 {
555 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400556
John Bauman89401822014-05-06 15:04:28 -0400557 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
558 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
559 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
560 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
561 }
562 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400563 case FORMAT_A8B8G8R8I:
564 {
565 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400566
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400567 r = abgr[0];
568 g = abgr[1];
569 b = abgr[2];
570 a = abgr[3];
571 }
572 break;
573 case FORMAT_A8B8G8R8UI:
574 {
575 unsigned char* abgr = (unsigned char*)element;
576
577 r = abgr[0];
578 g = abgr[1];
579 b = abgr[2];
580 a = abgr[3];
581 }
582 break;
583 case FORMAT_X8B8G8R8I_SNORM:
584 {
585 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400586
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400587 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
588 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
589 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
590 }
591 break;
John Bauman89401822014-05-06 15:04:28 -0400592 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400593 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400594 {
595 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400596
John Bauman89401822014-05-06 15:04:28 -0400597 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
598 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
599 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
600 }
601 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400602 case FORMAT_X8B8G8R8I:
603 {
604 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400605
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400606 r = bgr[0];
607 g = bgr[1];
608 b = bgr[2];
609 }
610 break;
611 case FORMAT_X8B8G8R8UI:
612 {
613 unsigned char* bgr = (unsigned char*)element;
614
615 r = bgr[0];
616 g = bgr[1];
617 b = bgr[2];
618 }
619 break;
620 case FORMAT_G8R8I_SNORM:
621 {
622 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400623
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400624 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
625 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
626 }
627 break;
John Bauman89401822014-05-06 15:04:28 -0400628 case FORMAT_G8R8:
629 {
630 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400631
John Bauman89401822014-05-06 15:04:28 -0400632 g = (gr & 0xFF00) * (1.0f / 0xFF00);
633 r = (gr & 0x00FF) * (1.0f / 0x00FF);
634 }
635 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400636 case FORMAT_G8R8I:
637 {
638 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400639
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400640 r = gr[0];
641 g = gr[1];
642 }
643 break;
644 case FORMAT_G8R8UI:
645 {
646 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400647
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400648 r = gr[0];
649 g = gr[1];
650 }
651 break;
652 case FORMAT_R16I:
653 r = *((short*)element);
654 break;
655 case FORMAT_R16UI:
656 r = *((unsigned short*)element);
657 break;
658 case FORMAT_G16R16I:
659 {
660 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400661
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400662 r = gr[0];
663 g = gr[1];
664 }
665 break;
John Bauman89401822014-05-06 15:04:28 -0400666 case FORMAT_G16R16:
667 {
668 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400669
John Bauman89401822014-05-06 15:04:28 -0400670 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
671 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
672 }
673 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400674 case FORMAT_G16R16UI:
675 {
676 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400677
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400678 r = gr[0];
679 g = gr[1];
680 }
681 break;
John Bauman89401822014-05-06 15:04:28 -0400682 case FORMAT_A2R10G10B10:
683 {
684 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400685
John Bauman89401822014-05-06 15:04:28 -0400686 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
687 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
688 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
689 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
690 }
691 break;
692 case FORMAT_A2B10G10R10:
693 {
694 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400695
John Bauman89401822014-05-06 15:04:28 -0400696 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
697 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
698 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
699 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
700 }
701 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400702 case FORMAT_A16B16G16R16I:
703 {
704 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400705
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400706 r = abgr[0];
707 g = abgr[1];
708 b = abgr[2];
709 a = abgr[3];
710 }
711 break;
John Bauman89401822014-05-06 15:04:28 -0400712 case FORMAT_A16B16G16R16:
713 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
714 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
715 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
716 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
717 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400718 case FORMAT_A16B16G16R16UI:
719 {
720 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400721
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400722 r = abgr[0];
723 g = abgr[1];
724 b = abgr[2];
725 a = abgr[3];
726 }
727 break;
728 case FORMAT_X16B16G16R16I:
729 {
730 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400731
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400732 r = bgr[0];
733 g = bgr[1];
734 b = bgr[2];
735 }
736 break;
737 case FORMAT_X16B16G16R16UI:
738 {
739 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400740
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400741 r = bgr[0];
742 g = bgr[1];
743 b = bgr[2];
744 }
745 break;
746 case FORMAT_A32B32G32R32I:
747 {
748 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400749
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400750 r = static_cast<float>(abgr[0]);
751 g = static_cast<float>(abgr[1]);
752 b = static_cast<float>(abgr[2]);
753 a = static_cast<float>(abgr[3]);
754 }
755 break;
756 case FORMAT_A32B32G32R32UI:
757 {
758 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400759
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400760 r = static_cast<float>(abgr[0]);
761 g = static_cast<float>(abgr[1]);
762 b = static_cast<float>(abgr[2]);
763 a = static_cast<float>(abgr[3]);
764 }
765 break;
766 case FORMAT_X32B32G32R32I:
767 {
768 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400769
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400770 r = static_cast<float>(bgr[0]);
771 g = static_cast<float>(bgr[1]);
772 b = static_cast<float>(bgr[2]);
773 }
774 break;
775 case FORMAT_X32B32G32R32UI:
776 {
777 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400778
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400779 r = static_cast<float>(bgr[0]);
780 g = static_cast<float>(bgr[1]);
781 b = static_cast<float>(bgr[2]);
782 }
783 break;
784 case FORMAT_G32R32I:
785 {
786 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400787
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400788 r = static_cast<float>(gr[0]);
789 g = static_cast<float>(gr[1]);
790 }
791 break;
792 case FORMAT_G32R32UI:
793 {
794 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400795
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400796 r = static_cast<float>(gr[0]);
797 g = static_cast<float>(gr[1]);
798 }
799 break;
800 case FORMAT_R32I:
801 r = static_cast<float>(*((int*)element));
802 break;
803 case FORMAT_R32UI:
804 r = static_cast<float>(*((unsigned int*)element));
805 break;
John Bauman89401822014-05-06 15:04:28 -0400806 case FORMAT_V8U8:
807 {
808 unsigned short vu = *(unsigned short*)element;
809
810 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
811 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
812 }
813 break;
814 case FORMAT_L6V5U5:
815 {
816 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400817
John Bauman89401822014-05-06 15:04:28 -0400818 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
819 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
820 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
821 }
822 break;
823 case FORMAT_Q8W8V8U8:
824 {
825 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400826
John Bauman89401822014-05-06 15:04:28 -0400827 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
828 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
829 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
830 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
831 }
832 break;
833 case FORMAT_X8L8V8U8:
834 {
835 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400836
John Bauman89401822014-05-06 15:04:28 -0400837 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
838 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
839 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
840 }
841 break;
842 case FORMAT_R8G8B8:
843 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
844 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
845 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
846 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400847 case FORMAT_B8G8R8:
848 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
849 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
850 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
851 break;
John Bauman89401822014-05-06 15:04:28 -0400852 case FORMAT_V16U16:
853 {
854 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400855
John Bauman89401822014-05-06 15:04:28 -0400856 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
857 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
858 }
859 break;
860 case FORMAT_A2W10V10U10:
861 {
862 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400863
John Bauman89401822014-05-06 15:04:28 -0400864 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
865 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
866 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
867 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
868 }
869 break;
870 case FORMAT_A16W16V16U16:
871 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
872 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
873 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
874 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
875 break;
876 case FORMAT_Q16W16V16U16:
877 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
878 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
879 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
880 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
881 break;
882 case FORMAT_L8:
883 r =
884 g =
885 b = *(unsigned char*)element * (1.0f / 0xFF);
886 break;
887 case FORMAT_A4L4:
888 {
889 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400890
John Bauman89401822014-05-06 15:04:28 -0400891 r =
892 g =
893 b = (al & 0x0F) * (1.0f / 0x0F);
894 a = (al & 0xF0) * (1.0f / 0xF0);
895 }
896 break;
897 case FORMAT_L16:
898 r =
899 g =
900 b = *(unsigned short*)element * (1.0f / 0xFFFF);
901 break;
902 case FORMAT_A8L8:
903 r =
904 g =
905 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
906 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
907 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400908 case FORMAT_L16F:
909 r =
910 g =
911 b = *(half*)element;
912 break;
913 case FORMAT_A16L16F:
914 r =
915 g =
916 b = ((half*)element)[0];
917 a = ((half*)element)[1];
918 break;
919 case FORMAT_L32F:
920 r =
921 g =
922 b = *(float*)element;
923 break;
924 case FORMAT_A32L32F:
925 r =
926 g =
927 b = ((float*)element)[0];
928 a = ((float*)element)[1];
929 break;
930 case FORMAT_A16F:
931 a = *(half*)element;
932 break;
John Bauman89401822014-05-06 15:04:28 -0400933 case FORMAT_R16F:
934 r = *(half*)element;
935 break;
936 case FORMAT_G16R16F:
937 r = ((half*)element)[0];
938 g = ((half*)element)[1];
939 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400940 case FORMAT_B16G16R16F:
941 r = ((half*)element)[0];
942 g = ((half*)element)[1];
943 b = ((half*)element)[2];
944 break;
John Bauman89401822014-05-06 15:04:28 -0400945 case FORMAT_A16B16G16R16F:
946 r = ((half*)element)[0];
947 g = ((half*)element)[1];
948 b = ((half*)element)[2];
949 a = ((half*)element)[3];
950 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400951 case FORMAT_A32F:
952 a = *(float*)element;
953 break;
John Bauman89401822014-05-06 15:04:28 -0400954 case FORMAT_R32F:
955 r = *(float*)element;
956 break;
957 case FORMAT_G32R32F:
958 r = ((float*)element)[0];
959 g = ((float*)element)[1];
960 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400961 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400962 case FORMAT_B32G32R32F:
963 r = ((float*)element)[0];
964 g = ((float*)element)[1];
965 b = ((float*)element)[2];
966 break;
John Bauman89401822014-05-06 15:04:28 -0400967 case FORMAT_A32B32G32R32F:
968 r = ((float*)element)[0];
969 g = ((float*)element)[1];
970 b = ((float*)element)[2];
971 a = ((float*)element)[3];
972 break;
973 case FORMAT_D32F:
974 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400975 case FORMAT_D32FS8_TEXTURE:
976 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400977 r = *(float*)element;
978 g = r;
979 b = r;
980 a = r;
981 break;
982 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400983 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400984 g = r;
985 b = r;
986 a = r;
987 break;
988 case FORMAT_S8:
989 r = *(unsigned char*)element * (1.0f / 0xFF);
990 break;
991 default:
992 ASSERT(false);
993 }
994
995 // if(sRGB)
996 // {
997 // r = sRGBtoLinear(r);
998 // g = sRGBtoLinear(g);
999 // b = sRGBtoLinear(b);
1000 // }
1001
1002 return Color<float>(r, g, b, a);
1003 }
1004
1005 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1006 {
1007 x -= 0.5f;
1008 y -= 0.5f;
1009 z -= 0.5f;
1010
1011 int x0 = clamp((int)x, 0, width - 1);
1012 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1013
1014 int y0 = clamp((int)y, 0, height - 1);
1015 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1016
1017 int z0 = clamp((int)z, 0, depth - 1);
1018 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1019
1020 Color<float> c000 = read(x0, y0, z0);
1021 Color<float> c100 = read(x1, y0, z0);
1022 Color<float> c010 = read(x0, y1, z0);
1023 Color<float> c110 = read(x1, y1, z0);
1024 Color<float> c001 = read(x0, y0, z1);
1025 Color<float> c101 = read(x1, y0, z1);
1026 Color<float> c011 = read(x0, y1, z1);
1027 Color<float> c111 = read(x1, y1, z1);
1028
1029 float fx = x - x0;
1030 float fy = y - y0;
1031 float fz = z - z0;
1032
1033 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1034 c100 *= fx * (1 - fy) * (1 - fz);
1035 c010 *= (1 - fx) * fy * (1 - fz);
1036 c110 *= fx * fy * (1 - fz);
1037 c001 *= (1 - fx) * (1 - fy) * fz;
1038 c101 *= fx * (1 - fy) * fz;
1039 c011 *= (1 - fx) * fy * fz;
1040 c111 *= fx * fy * fz;
1041
1042 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1043 }
1044
1045 Color<float> Surface::Buffer::sample(float x, float y) const
1046 {
1047 x -= 0.5f;
1048 y -= 0.5f;
1049
1050 int x0 = clamp((int)x, 0, width - 1);
1051 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1052
1053 int y0 = clamp((int)y, 0, height - 1);
1054 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1055
1056 Color<float> c00 = read(x0, y0);
1057 Color<float> c10 = read(x1, y0);
1058 Color<float> c01 = read(x0, y1);
1059 Color<float> c11 = read(x1, y1);
1060
1061 float fx = x - x0;
1062 float fy = y - y0;
1063
1064 c00 *= (1 - fx) * (1 - fy);
1065 c10 *= fx * (1 - fy);
1066 c01 *= (1 - fx) * fy;
1067 c11 *= fx * fy;
1068
1069 return c00 + c10 + c01 + c11;
1070 }
1071
John Bauman19bac1e2014-05-06 15:23:49 -04001072 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001073 {
1074 this->lock = lock;
1075
1076 switch(lock)
1077 {
1078 case LOCK_UNLOCKED:
1079 case LOCK_READONLY:
1080 break;
1081 case LOCK_WRITEONLY:
1082 case LOCK_READWRITE:
1083 case LOCK_DISCARD:
1084 dirty = true;
1085 break;
1086 default:
1087 ASSERT(false);
1088 }
1089
John Baumand4ae8632014-05-06 16:18:33 -04001090 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001091 {
John Baumand4ae8632014-05-06 16:18:33 -04001092 switch(format)
1093 {
1094 #if S3TC_SUPPORT
1095 case FORMAT_DXT1:
1096 #endif
1097 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001098 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001099 case FORMAT_R11_EAC:
1100 case FORMAT_SIGNED_R11_EAC:
1101 case FORMAT_RGB8_ETC2:
1102 case FORMAT_SRGB8_ETC2:
1103 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1104 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001105 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001106 case FORMAT_RG11_EAC:
1107 case FORMAT_SIGNED_RG11_EAC:
1108 case FORMAT_RGBA8_ETC2_EAC:
1109 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1110 case FORMAT_RGBA_ASTC_4x4_KHR:
1111 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1112 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1113 case FORMAT_RGBA_ASTC_5x4_KHR:
1114 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1115 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1116 case FORMAT_RGBA_ASTC_5x5_KHR:
1117 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1118 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1119 case FORMAT_RGBA_ASTC_6x5_KHR:
1120 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1121 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1122 case FORMAT_RGBA_ASTC_6x6_KHR:
1123 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1124 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1125 case FORMAT_RGBA_ASTC_8x5_KHR:
1126 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1127 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1128 case FORMAT_RGBA_ASTC_8x6_KHR:
1129 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1130 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1131 case FORMAT_RGBA_ASTC_8x8_KHR:
1132 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1133 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1134 case FORMAT_RGBA_ASTC_10x5_KHR:
1135 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1136 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1137 case FORMAT_RGBA_ASTC_10x6_KHR:
1138 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1139 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1140 case FORMAT_RGBA_ASTC_10x8_KHR:
1141 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1142 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1143 case FORMAT_RGBA_ASTC_10x10_KHR:
1144 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1145 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1146 case FORMAT_RGBA_ASTC_12x10_KHR:
1147 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1148 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1149 case FORMAT_RGBA_ASTC_12x12_KHR:
1150 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1151 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001152 #if S3TC_SUPPORT
1153 case FORMAT_DXT3:
1154 case FORMAT_DXT5:
1155 #endif
1156 case FORMAT_ATI2:
1157 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1158 default:
1159 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1160 }
John Bauman89401822014-05-06 15:04:28 -04001161 }
1162
1163 return 0;
1164 }
1165
1166 void Surface::Buffer::unlockRect()
1167 {
1168 lock = LOCK_UNLOCKED;
1169 }
1170
Nicolas Capens477314b2015-06-09 16:47:29 -04001171 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1172 {
1173 resource = new Resource(0);
1174 hasParent = false;
1175 ownExternal = false;
1176 depth = max(1, depth);
1177
1178 external.buffer = pixels;
1179 external.width = width;
1180 external.height = height;
1181 external.depth = depth;
1182 external.format = format;
1183 external.bytes = bytes(external.format);
1184 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001185 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001186 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001187 external.sliceP = external.bytes ? slice / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001188 external.lock = LOCK_UNLOCKED;
1189 external.dirty = true;
1190
1191 internal.buffer = 0;
1192 internal.width = width;
1193 internal.height = height;
1194 internal.depth = depth;
1195 internal.format = selectInternalFormat(format);
1196 internal.bytes = bytes(internal.format);
1197 internal.pitchB = pitchB(internal.width, internal.format, false);
1198 internal.pitchP = pitchP(internal.width, internal.format, false);
1199 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
1200 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
1201 internal.lock = LOCK_UNLOCKED;
1202 internal.dirty = false;
1203
1204 stencil.buffer = 0;
1205 stencil.width = width;
1206 stencil.height = height;
1207 stencil.depth = depth;
1208 stencil.format = FORMAT_S8;
1209 stencil.bytes = bytes(stencil.format);
1210 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
1211 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
1212 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
1213 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
1214 stencil.lock = LOCK_UNLOCKED;
1215 stencil.dirty = false;
1216
1217 dirtyMipmaps = true;
1218 paletteUsed = 0;
1219 }
1220
Nicolas Capensf3898612015-11-24 15:33:31 -05001221 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001222 {
1223 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -04001224 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001225 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001226 depth = max(1, depth);
1227
1228 external.buffer = 0;
1229 external.width = width;
1230 external.height = height;
1231 external.depth = depth;
1232 external.format = format;
1233 external.bytes = bytes(external.format);
1234 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
1235 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
1236 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
1237 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
1238 external.lock = LOCK_UNLOCKED;
1239 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001240
1241 internal.buffer = 0;
1242 internal.width = width;
1243 internal.height = height;
1244 internal.depth = depth;
1245 internal.format = selectInternalFormat(format);
1246 internal.bytes = bytes(internal.format);
Nicolas Capensf3898612015-11-24 15:33:31 -05001247 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1248 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided;
John Bauman89401822014-05-06 15:04:28 -04001249 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
1250 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
1251 internal.lock = LOCK_UNLOCKED;
1252 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001253
1254 stencil.buffer = 0;
1255 stencil.width = width;
1256 stencil.height = height;
1257 stencil.depth = depth;
1258 stencil.format = FORMAT_S8;
1259 stencil.bytes = bytes(stencil.format);
1260 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
1261 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
1262 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
1263 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
1264 stencil.lock = LOCK_UNLOCKED;
1265 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001266
1267 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001268 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001269 }
1270
1271 Surface::~Surface()
1272 {
Nicolas Capensbf7a8142017-05-19 10:57:28 -04001273 // sync() must be called before this destructor to ensure all locks have been released.
1274 // We can't call it here because the parent resource may already have been destroyed.
1275 ASSERT(external.lock == LOCK_UNLOCKED);
1276 ASSERT(internal.lock == LOCK_UNLOCKED);
1277 ASSERT(stencil.lock == LOCK_UNLOCKED);
John Bauman8a4f6fc2014-05-06 15:26:18 -04001278
John Bauman89401822014-05-06 15:04:28 -04001279 if(!hasParent)
1280 {
1281 resource->destruct();
1282 }
1283
Nicolas Capens477314b2015-06-09 16:47:29 -04001284 if(ownExternal)
1285 {
1286 deallocate(external.buffer);
1287 }
John Bauman89401822014-05-06 15:04:28 -04001288
1289 if(internal.buffer != external.buffer)
1290 {
1291 deallocate(internal.buffer);
1292 }
1293
1294 deallocate(stencil.buffer);
1295
1296 external.buffer = 0;
1297 internal.buffer = 0;
1298 stencil.buffer = 0;
1299 }
1300
John Bauman19bac1e2014-05-06 15:23:49 -04001301 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001302 {
1303 resource->lock(client);
1304
1305 if(!external.buffer)
1306 {
1307 if(internal.buffer && identicalFormats())
1308 {
1309 external.buffer = internal.buffer;
1310 }
1311 else
1312 {
1313 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
1314 }
1315 }
1316
1317 if(internal.dirty)
1318 {
1319 if(lock != LOCK_DISCARD)
1320 {
1321 update(external, internal);
1322 }
John Bauman66b8ab22014-05-06 15:57:45 -04001323
1324 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001325 }
1326
1327 switch(lock)
1328 {
1329 case LOCK_READONLY:
1330 break;
1331 case LOCK_WRITEONLY:
1332 case LOCK_READWRITE:
1333 case LOCK_DISCARD:
1334 dirtyMipmaps = true;
1335 break;
1336 default:
1337 ASSERT(false);
1338 }
1339
John Bauman19bac1e2014-05-06 15:23:49 -04001340 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001341 }
1342
1343 void Surface::unlockExternal()
1344 {
1345 resource->unlock();
1346
1347 external.unlockRect();
1348 }
1349
John Bauman19bac1e2014-05-06 15:23:49 -04001350 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001351 {
1352 if(lock != LOCK_UNLOCKED)
1353 {
1354 resource->lock(client);
1355 }
1356
1357 if(!internal.buffer)
1358 {
1359 if(external.buffer && identicalFormats())
1360 {
1361 internal.buffer = external.buffer;
1362 }
1363 else
1364 {
1365 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
1366 }
1367 }
1368
1369 // FIXME: WHQL requires conversion to lower external precision and back
1370 if(logPrecision >= WHQL)
1371 {
1372 if(internal.dirty && renderTarget && internal.format != external.format)
1373 {
1374 if(lock != LOCK_DISCARD)
1375 {
1376 switch(external.format)
1377 {
1378 case FORMAT_R3G3B2:
1379 case FORMAT_A8R3G3B2:
1380 case FORMAT_A1R5G5B5:
1381 case FORMAT_A2R10G10B10:
1382 case FORMAT_A2B10G10R10:
1383 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1384 unlockExternal();
1385 break;
1386 default:
1387 // Difference passes WHQL
1388 break;
1389 }
1390 }
1391 }
1392 }
1393
John Bauman66b8ab22014-05-06 15:57:45 -04001394 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001395 {
1396 if(lock != LOCK_DISCARD)
1397 {
1398 update(internal, external);
1399 }
John Bauman89401822014-05-06 15:04:28 -04001400
John Bauman66b8ab22014-05-06 15:57:45 -04001401 external.dirty = false;
1402 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001403 }
1404
1405 switch(lock)
1406 {
1407 case LOCK_UNLOCKED:
1408 case LOCK_READONLY:
1409 break;
1410 case LOCK_WRITEONLY:
1411 case LOCK_READWRITE:
1412 case LOCK_DISCARD:
1413 dirtyMipmaps = true;
1414 break;
1415 default:
1416 ASSERT(false);
1417 }
1418
1419 if(lock == LOCK_READONLY && client == PUBLIC)
1420 {
1421 resolve();
1422 }
1423
John Bauman19bac1e2014-05-06 15:23:49 -04001424 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001425 }
1426
1427 void Surface::unlockInternal()
1428 {
1429 resource->unlock();
1430
1431 internal.unlockRect();
1432 }
1433
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001434 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001435 {
1436 resource->lock(client);
1437
1438 if(!stencil.buffer)
1439 {
1440 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1441 }
1442
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001443 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001444 }
1445
1446 void Surface::unlockStencil()
1447 {
1448 resource->unlock();
1449
1450 stencil.unlockRect();
1451 }
1452
1453 int Surface::bytes(Format format)
1454 {
1455 switch(format)
1456 {
1457 case FORMAT_NULL: return 0;
1458 case FORMAT_P8: return 1;
1459 case FORMAT_A8P8: return 2;
1460 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001461 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001462 case FORMAT_R8: return 1;
1463 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001464 case FORMAT_R16I: return 2;
1465 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001466 case FORMAT_A8R3G3B2: return 2;
1467 case FORMAT_R5G6B5: return 2;
1468 case FORMAT_A1R5G5B5: return 2;
1469 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001470 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001471 case FORMAT_X4R4G4B4: return 2;
1472 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001473 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001474 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001475 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001476 case FORMAT_R32I: return 4;
1477 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001478 case FORMAT_X8R8G8B8: return 4;
1479 // case FORMAT_X8G8R8B8Q: return 4;
1480 case FORMAT_A8R8G8B8: return 4;
1481 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001482 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001483 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001484 case FORMAT_SRGB8_X8: return 4;
1485 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001486 case FORMAT_A8B8G8R8I: return 4;
1487 case FORMAT_R8UI: return 1;
1488 case FORMAT_G8R8UI: return 2;
1489 case FORMAT_X8B8G8R8UI: return 4;
1490 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001491 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001492 case FORMAT_R8I_SNORM: return 1;
1493 case FORMAT_G8R8I_SNORM: return 2;
1494 case FORMAT_X8B8G8R8I_SNORM: return 4;
1495 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001496 case FORMAT_A2R10G10B10: return 4;
1497 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001498 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001499 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001500 case FORMAT_G16R16I: return 4;
1501 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001502 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001503 case FORMAT_G32R32I: return 8;
1504 case FORMAT_G32R32UI: return 8;
1505 case FORMAT_X16B16G16R16I: return 8;
1506 case FORMAT_X16B16G16R16UI: return 8;
1507 case FORMAT_A16B16G16R16I: return 8;
1508 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001509 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001510 case FORMAT_X32B32G32R32I: return 16;
1511 case FORMAT_X32B32G32R32UI: return 16;
1512 case FORMAT_A32B32G32R32I: return 16;
1513 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001514 // Compressed formats
1515 #if S3TC_SUPPORT
1516 case FORMAT_DXT1: return 2; // Column of four pixels
1517 case FORMAT_DXT3: return 4; // Column of four pixels
1518 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001519 #endif
John Bauman89401822014-05-06 15:04:28 -04001520 case FORMAT_ATI1: return 2; // Column of four pixels
1521 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001522 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001523 case FORMAT_R11_EAC: return 2;
1524 case FORMAT_SIGNED_R11_EAC: return 2;
1525 case FORMAT_RG11_EAC: return 4;
1526 case FORMAT_SIGNED_RG11_EAC: return 4;
1527 case FORMAT_RGB8_ETC2: return 2;
1528 case FORMAT_SRGB8_ETC2: return 2;
1529 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1530 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1531 case FORMAT_RGBA8_ETC2_EAC: return 4;
1532 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1533 case FORMAT_RGBA_ASTC_4x4_KHR:
1534 case FORMAT_RGBA_ASTC_5x4_KHR:
1535 case FORMAT_RGBA_ASTC_5x5_KHR:
1536 case FORMAT_RGBA_ASTC_6x5_KHR:
1537 case FORMAT_RGBA_ASTC_6x6_KHR:
1538 case FORMAT_RGBA_ASTC_8x5_KHR:
1539 case FORMAT_RGBA_ASTC_8x6_KHR:
1540 case FORMAT_RGBA_ASTC_8x8_KHR:
1541 case FORMAT_RGBA_ASTC_10x5_KHR:
1542 case FORMAT_RGBA_ASTC_10x6_KHR:
1543 case FORMAT_RGBA_ASTC_10x8_KHR:
1544 case FORMAT_RGBA_ASTC_10x10_KHR:
1545 case FORMAT_RGBA_ASTC_12x10_KHR:
1546 case FORMAT_RGBA_ASTC_12x12_KHR:
1547 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1548 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1549 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1550 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1551 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1552 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1553 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1554 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1555 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1556 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1557 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1558 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1559 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1560 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001561 // Bumpmap formats
1562 case FORMAT_V8U8: return 2;
1563 case FORMAT_L6V5U5: return 2;
1564 case FORMAT_Q8W8V8U8: return 4;
1565 case FORMAT_X8L8V8U8: return 4;
1566 case FORMAT_A2W10V10U10: return 4;
1567 case FORMAT_V16U16: return 4;
1568 case FORMAT_A16W16V16U16: return 8;
1569 case FORMAT_Q16W16V16U16: return 8;
1570 // Luminance formats
1571 case FORMAT_L8: return 1;
1572 case FORMAT_A4L4: return 1;
1573 case FORMAT_L16: return 2;
1574 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001575 case FORMAT_L16F: return 2;
1576 case FORMAT_A16L16F: return 4;
1577 case FORMAT_L32F: return 4;
1578 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001579 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001580 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001581 case FORMAT_R16F: return 2;
1582 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001583 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001584 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001585 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001586 case FORMAT_R32F: return 4;
1587 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001588 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001589 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001590 case FORMAT_A32B32G32R32F: return 16;
1591 // Depth/stencil formats
1592 case FORMAT_D16: return 2;
1593 case FORMAT_D32: return 4;
1594 case FORMAT_D24X8: return 4;
1595 case FORMAT_D24S8: return 4;
1596 case FORMAT_D24FS8: return 4;
1597 case FORMAT_D32F: return 4;
1598 case FORMAT_D32F_COMPLEMENTARY: return 4;
1599 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001600 case FORMAT_D32FS8_TEXTURE: return 4;
1601 case FORMAT_D32FS8_SHADOW: return 4;
1602 case FORMAT_DF24S8: return 4;
1603 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001604 case FORMAT_INTZ: return 4;
1605 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001606 case FORMAT_YV12_BT601: return 1; // Y plane only
1607 case FORMAT_YV12_BT709: return 1; // Y plane only
1608 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001609 default:
1610 ASSERT(false);
1611 }
1612
1613 return 0;
1614 }
1615
1616 int Surface::pitchB(int width, Format format, bool target)
1617 {
1618 if(target || isDepth(format) || isStencil(format))
1619 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001620 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001621 }
1622
1623 switch(format)
1624 {
1625 #if S3TC_SUPPORT
1626 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001627 #endif
1628 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001629 case FORMAT_R11_EAC:
1630 case FORMAT_SIGNED_R11_EAC:
1631 case FORMAT_RGB8_ETC2:
1632 case FORMAT_SRGB8_ETC2:
1633 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1634 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001635 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001636 case FORMAT_RG11_EAC:
1637 case FORMAT_SIGNED_RG11_EAC:
1638 case FORMAT_RGBA8_ETC2_EAC:
1639 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1640 case FORMAT_RGBA_ASTC_4x4_KHR:
1641 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1642 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1643 case FORMAT_RGBA_ASTC_5x4_KHR:
1644 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1645 case FORMAT_RGBA_ASTC_5x5_KHR:
1646 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1647 return 16 * ((width + 4) / 5);
1648 case FORMAT_RGBA_ASTC_6x5_KHR:
1649 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1650 case FORMAT_RGBA_ASTC_6x6_KHR:
1651 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1652 return 16 * ((width + 5) / 6);
1653 case FORMAT_RGBA_ASTC_8x5_KHR:
1654 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1655 case FORMAT_RGBA_ASTC_8x6_KHR:
1656 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1657 case FORMAT_RGBA_ASTC_8x8_KHR:
1658 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1659 return 16 * ((width + 7) / 8);
1660 case FORMAT_RGBA_ASTC_10x5_KHR:
1661 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1662 case FORMAT_RGBA_ASTC_10x6_KHR:
1663 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1664 case FORMAT_RGBA_ASTC_10x8_KHR:
1665 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1666 case FORMAT_RGBA_ASTC_10x10_KHR:
1667 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1668 return 16 * ((width + 9) / 10);
1669 case FORMAT_RGBA_ASTC_12x10_KHR:
1670 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1671 case FORMAT_RGBA_ASTC_12x12_KHR:
1672 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1673 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001674 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001675 case FORMAT_DXT3:
1676 case FORMAT_DXT5:
1677 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001678 #endif
John Bauman89401822014-05-06 15:04:28 -04001679 case FORMAT_ATI1:
1680 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1681 case FORMAT_ATI2:
1682 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001683 case FORMAT_YV12_BT601:
1684 case FORMAT_YV12_BT709:
1685 case FORMAT_YV12_JFIF:
1686 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001687 default:
1688 return bytes(format) * width;
1689 }
1690 }
1691
1692 int Surface::pitchP(int width, Format format, bool target)
1693 {
1694 int B = bytes(format);
1695
1696 return B > 0 ? pitchB(width, format, target) / B : 0;
1697 }
1698
1699 int Surface::sliceB(int width, int height, Format format, bool target)
1700 {
1701 if(target || isDepth(format) || isStencil(format))
1702 {
1703 height = ((height + 1) & ~1);
1704 }
1705
1706 switch(format)
1707 {
1708 #if S3TC_SUPPORT
1709 case FORMAT_DXT1:
1710 case FORMAT_DXT3:
1711 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001712 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001713 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001714 case FORMAT_R11_EAC:
1715 case FORMAT_SIGNED_R11_EAC:
1716 case FORMAT_RG11_EAC:
1717 case FORMAT_SIGNED_RG11_EAC:
1718 case FORMAT_RGB8_ETC2:
1719 case FORMAT_SRGB8_ETC2:
1720 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1721 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1722 case FORMAT_RGBA8_ETC2_EAC:
1723 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1724 case FORMAT_RGBA_ASTC_4x4_KHR:
1725 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1726 case FORMAT_RGBA_ASTC_5x4_KHR:
1727 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Nicolas Capens22658242014-11-29 00:31:41 -05001728 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001729 case FORMAT_RGBA_ASTC_5x5_KHR:
1730 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1731 case FORMAT_RGBA_ASTC_6x5_KHR:
1732 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1733 case FORMAT_RGBA_ASTC_8x5_KHR:
1734 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1735 case FORMAT_RGBA_ASTC_10x5_KHR:
1736 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1737 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1738 case FORMAT_RGBA_ASTC_6x6_KHR:
1739 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1740 case FORMAT_RGBA_ASTC_8x6_KHR:
1741 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1742 case FORMAT_RGBA_ASTC_10x6_KHR:
1743 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1744 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1745 case FORMAT_RGBA_ASTC_8x8_KHR:
1746 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1747 case FORMAT_RGBA_ASTC_10x8_KHR:
1748 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1749 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1750 case FORMAT_RGBA_ASTC_10x10_KHR:
1751 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1752 case FORMAT_RGBA_ASTC_12x10_KHR:
1753 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1754 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1755 case FORMAT_RGBA_ASTC_12x12_KHR:
1756 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1757 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001758 case FORMAT_ATI1:
1759 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001760 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001761 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001762 }
1763 }
1764
1765 int Surface::sliceP(int width, int height, Format format, bool target)
1766 {
1767 int B = bytes(format);
1768
1769 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1770 }
1771
1772 void Surface::update(Buffer &destination, Buffer &source)
1773 {
1774 // ASSERT(source.lock != LOCK_UNLOCKED);
1775 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001776
John Bauman89401822014-05-06 15:04:28 -04001777 if(destination.buffer != source.buffer)
1778 {
1779 ASSERT(source.dirty && !destination.dirty);
1780
1781 switch(source.format)
1782 {
1783 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001784 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1785 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1786 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1787 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1788 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1789 #if S3TC_SUPPORT
1790 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1791 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1792 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001793 #endif
John Bauman89401822014-05-06 15:04:28 -04001794 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1795 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001796 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1797 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1798 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1799 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001800 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001801 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1802 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1803 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1804 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1805 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1806 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1807 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1808 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1809 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1810 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1811 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1812 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1813 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1814 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1815 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1816 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1817 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1818 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1819 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1820 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1821 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1822 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1823 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1824 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1825 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1826 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1827 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1828 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1829 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1830 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1831 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1832 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1833 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1834 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001835 default: genericUpdate(destination, source); break;
1836 }
1837 }
John Bauman89401822014-05-06 15:04:28 -04001838 }
1839
1840 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1841 {
1842 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1843 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1844
1845 int depth = min(destination.depth, source.depth);
1846 int height = min(destination.height, source.height);
1847 int width = min(destination.width, source.width);
1848 int rowBytes = width * source.bytes;
1849
1850 for(int z = 0; z < depth; z++)
1851 {
1852 unsigned char *sourceRow = sourceSlice;
1853 unsigned char *destinationRow = destinationSlice;
1854
1855 for(int y = 0; y < height; y++)
1856 {
1857 if(source.format == destination.format)
1858 {
1859 memcpy(destinationRow, sourceRow, rowBytes);
1860 }
1861 else
1862 {
1863 unsigned char *sourceElement = sourceRow;
1864 unsigned char *destinationElement = destinationRow;
1865
1866 for(int x = 0; x < width; x++)
1867 {
1868 Color<float> color = source.read(sourceElement);
1869 destination.write(destinationElement, color);
1870
1871 sourceElement += source.bytes;
1872 destinationElement += destination.bytes;
1873 }
1874 }
1875
1876 sourceRow += source.pitchB;
1877 destinationRow += destination.pitchB;
1878 }
1879
1880 sourceSlice += source.sliceB;
1881 destinationSlice += destination.sliceB;
1882 }
1883 }
1884
1885 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1886 {
1887 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1888 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1889
1890 for(int z = 0; z < destination.depth && z < source.depth; z++)
1891 {
1892 unsigned char *sourceRow = sourceSlice;
1893 unsigned char *destinationRow = destinationSlice;
1894
1895 for(int y = 0; y < destination.height && y < source.height; y++)
1896 {
1897 unsigned char *sourceElement = sourceRow;
1898 unsigned char *destinationElement = destinationRow;
1899
1900 for(int x = 0; x < destination.width && x < source.width; x++)
1901 {
1902 unsigned int b = sourceElement[0];
1903 unsigned int g = sourceElement[1];
1904 unsigned int r = sourceElement[2];
1905
1906 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1907
1908 sourceElement += source.bytes;
1909 destinationElement += destination.bytes;
1910 }
1911
1912 sourceRow += source.pitchB;
1913 destinationRow += destination.pitchB;
1914 }
1915
1916 sourceSlice += source.sliceB;
1917 destinationSlice += destination.sliceB;
1918 }
1919 }
1920
John Bauman89401822014-05-06 15:04:28 -04001921 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1922 {
1923 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1924 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1925
1926 for(int z = 0; z < destination.depth && z < source.depth; z++)
1927 {
1928 unsigned char *sourceRow = sourceSlice;
1929 unsigned char *destinationRow = destinationSlice;
1930
1931 for(int y = 0; y < destination.height && y < source.height; y++)
1932 {
1933 unsigned char *sourceElement = sourceRow;
1934 unsigned char *destinationElement = destinationRow;
1935
1936 for(int x = 0; x < destination.width && x < source.width; x++)
1937 {
1938 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001939
John Bauman89401822014-05-06 15:04:28 -04001940 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1941 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1942 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1943
1944 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1945
1946 sourceElement += source.bytes;
1947 destinationElement += destination.bytes;
1948 }
1949
1950 sourceRow += source.pitchB;
1951 destinationRow += destination.pitchB;
1952 }
1953
1954 sourceSlice += source.sliceB;
1955 destinationSlice += destination.sliceB;
1956 }
1957 }
1958
1959 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1960 {
1961 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1962 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1963
1964 for(int z = 0; z < destination.depth && z < source.depth; z++)
1965 {
1966 unsigned char *sourceRow = sourceSlice;
1967 unsigned char *destinationRow = destinationSlice;
1968
1969 for(int y = 0; y < destination.height && y < source.height; y++)
1970 {
1971 unsigned char *sourceElement = sourceRow;
1972 unsigned char *destinationElement = destinationRow;
1973
1974 for(int x = 0; x < destination.width && x < source.width; x++)
1975 {
1976 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001977
John Bauman89401822014-05-06 15:04:28 -04001978 unsigned int a = (argb & 0x8000) * 130560;
1979 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1980 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1981 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1982
1983 *(unsigned int*)destinationElement = a | r | g | b;
1984
1985 sourceElement += source.bytes;
1986 destinationElement += destination.bytes;
1987 }
1988
1989 sourceRow += source.pitchB;
1990 destinationRow += destination.pitchB;
1991 }
1992
1993 sourceSlice += source.sliceB;
1994 destinationSlice += destination.sliceB;
1995 }
1996 }
1997
1998 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1999 {
2000 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2001 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2002
2003 for(int z = 0; z < destination.depth && z < source.depth; z++)
2004 {
2005 unsigned char *sourceRow = sourceSlice;
2006 unsigned char *destinationRow = destinationSlice;
2007
2008 for(int y = 0; y < destination.height && y < source.height; y++)
2009 {
2010 unsigned char *sourceElement = sourceRow;
2011 unsigned char *destinationElement = destinationRow;
2012
2013 for(int x = 0; x < destination.width && x < source.width; x++)
2014 {
2015 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002016
John Bauman89401822014-05-06 15:04:28 -04002017 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2018 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2019 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2020
2021 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2022
2023 sourceElement += source.bytes;
2024 destinationElement += destination.bytes;
2025 }
2026
2027 sourceRow += source.pitchB;
2028 destinationRow += destination.pitchB;
2029 }
2030
2031 sourceSlice += source.sliceB;
2032 destinationSlice += destination.sliceB;
2033 }
2034 }
2035
2036 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
2037 {
2038 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2039 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2040
2041 for(int z = 0; z < destination.depth && z < source.depth; z++)
2042 {
2043 unsigned char *sourceRow = sourceSlice;
2044 unsigned char *destinationRow = destinationSlice;
2045
2046 for(int y = 0; y < destination.height && y < source.height; y++)
2047 {
2048 unsigned char *sourceElement = sourceRow;
2049 unsigned char *destinationElement = destinationRow;
2050
2051 for(int x = 0; x < destination.width && x < source.width; x++)
2052 {
2053 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002054
John Bauman89401822014-05-06 15:04:28 -04002055 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2056 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2057 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2058 unsigned int b = (argb & 0x000F) * 0x00000011;
2059
2060 *(unsigned int*)destinationElement = a | r | g | b;
2061
2062 sourceElement += source.bytes;
2063 destinationElement += destination.bytes;
2064 }
2065
2066 sourceRow += source.pitchB;
2067 destinationRow += destination.pitchB;
2068 }
2069
2070 sourceSlice += source.sliceB;
2071 destinationSlice += destination.sliceB;
2072 }
2073 }
2074
2075 void Surface::decodeP8(Buffer &destination, const Buffer &source)
2076 {
2077 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2078 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2079
2080 for(int z = 0; z < destination.depth && z < source.depth; z++)
2081 {
2082 unsigned char *sourceRow = sourceSlice;
2083 unsigned char *destinationRow = destinationSlice;
2084
2085 for(int y = 0; y < destination.height && y < source.height; y++)
2086 {
2087 unsigned char *sourceElement = sourceRow;
2088 unsigned char *destinationElement = destinationRow;
2089
2090 for(int x = 0; x < destination.width && x < source.width; x++)
2091 {
2092 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2093
2094 unsigned int r = (abgr & 0x000000FF) << 16;
2095 unsigned int g = (abgr & 0x0000FF00) << 0;
2096 unsigned int b = (abgr & 0x00FF0000) >> 16;
2097 unsigned int a = (abgr & 0xFF000000) >> 0;
2098
2099 *(unsigned int*)destinationElement = a | r | g | b;
2100
2101 sourceElement += source.bytes;
2102 destinationElement += destination.bytes;
2103 }
2104
2105 sourceRow += source.pitchB;
2106 destinationRow += destination.pitchB;
2107 }
2108
2109 sourceSlice += source.sliceB;
2110 destinationSlice += destination.sliceB;
2111 }
2112 }
2113
2114#if S3TC_SUPPORT
2115 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
2116 {
2117 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002118 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002119
2120 for(int z = 0; z < external.depth; z++)
2121 {
2122 unsigned int *dest = destSlice;
2123
2124 for(int y = 0; y < external.height; y += 4)
2125 {
2126 for(int x = 0; x < external.width; x += 4)
2127 {
2128 Color<byte> c[4];
2129
2130 c[0] = source->c0;
2131 c[1] = source->c1;
2132
2133 if(source->c0 > source->c1) // No transparency
2134 {
2135 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2136 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2137 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2138 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2139 c[2].a = 0xFF;
2140
2141 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2142 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2143 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2144 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2145 c[3].a = 0xFF;
2146 }
2147 else // c3 transparent
2148 {
2149 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2150 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2151 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2152 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2153 c[2].a = 0xFF;
2154
2155 c[3].r = 0;
2156 c[3].g = 0;
2157 c[3].b = 0;
2158 c[3].a = 0;
2159 }
2160
2161 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2162 {
2163 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2164 {
2165 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2166 }
2167 }
2168
2169 source++;
2170 }
2171 }
2172
2173 (byte*&)destSlice += internal.sliceB;
2174 }
2175 }
2176
2177 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
2178 {
2179 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002180 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002181
2182 for(int z = 0; z < external.depth; z++)
2183 {
2184 unsigned int *dest = destSlice;
2185
2186 for(int y = 0; y < external.height; y += 4)
2187 {
2188 for(int x = 0; x < external.width; x += 4)
2189 {
2190 Color<byte> c[4];
2191
2192 c[0] = source->c0;
2193 c[1] = source->c1;
2194
2195 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2196 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2197 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2198 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2199
2200 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2201 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2202 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2203 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2204
2205 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2206 {
2207 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2208 {
2209 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2210 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2211
2212 dest[(x + i) + (y + j) * internal.width] = color;
2213 }
2214 }
2215
2216 source++;
2217 }
2218 }
2219
2220 (byte*&)destSlice += internal.sliceB;
2221 }
2222 }
2223
2224 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
2225 {
2226 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002227 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002228
2229 for(int z = 0; z < external.depth; z++)
2230 {
2231 unsigned int *dest = destSlice;
2232
2233 for(int y = 0; y < external.height; y += 4)
2234 {
2235 for(int x = 0; x < external.width; x += 4)
2236 {
2237 Color<byte> c[4];
2238
2239 c[0] = source->c0;
2240 c[1] = source->c1;
2241
2242 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2243 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2244 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2245 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2246
2247 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2248 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2249 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2250 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2251
2252 byte a[8];
2253
2254 a[0] = source->a0;
2255 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002256
John Bauman89401822014-05-06 15:04:28 -04002257 if(a[0] > a[1])
2258 {
2259 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2260 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2261 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2262 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2263 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2264 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2265 }
2266 else
2267 {
2268 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2269 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2270 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2271 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2272 a[6] = 0;
2273 a[7] = 0xFF;
2274 }
2275
2276 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2277 {
2278 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2279 {
2280 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2281 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002282
John Bauman89401822014-05-06 15:04:28 -04002283 dest[(x + i) + (y + j) * internal.width] = color;
2284 }
2285 }
2286
2287 source++;
2288 }
2289 }
2290
2291 (byte*&)destSlice += internal.sliceB;
2292 }
2293 }
Nicolas Capens22658242014-11-29 00:31:41 -05002294#endif
John Bauman89401822014-05-06 15:04:28 -04002295
2296 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
2297 {
2298 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002299 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002300
2301 for(int z = 0; z < external.depth; z++)
2302 {
2303 byte *dest = destSlice;
2304
2305 for(int y = 0; y < external.height; y += 4)
2306 {
2307 for(int x = 0; x < external.width; x += 4)
2308 {
2309 byte r[8];
2310
2311 r[0] = source->r0;
2312 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002313
John Bauman89401822014-05-06 15:04:28 -04002314 if(r[0] > r[1])
2315 {
2316 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2317 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2318 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2319 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2320 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2321 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2322 }
2323 else
2324 {
2325 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2326 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2327 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2328 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2329 r[6] = 0;
2330 r[7] = 0xFF;
2331 }
2332
2333 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2334 {
2335 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2336 {
2337 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2338 }
2339 }
2340
2341 source++;
2342 }
2343 }
2344
2345 destSlice += internal.sliceB;
2346 }
2347 }
2348
2349 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
2350 {
2351 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002352 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002353
2354 for(int z = 0; z < external.depth; z++)
2355 {
2356 word *dest = destSlice;
2357
2358 for(int y = 0; y < external.height; y += 4)
2359 {
2360 for(int x = 0; x < external.width; x += 4)
2361 {
2362 byte X[8];
2363
2364 X[0] = source->x0;
2365 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002366
John Bauman89401822014-05-06 15:04:28 -04002367 if(X[0] > X[1])
2368 {
2369 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2370 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2371 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2372 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2373 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2374 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2375 }
2376 else
2377 {
2378 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2379 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2380 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2381 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2382 X[6] = 0;
2383 X[7] = 0xFF;
2384 }
2385
2386 byte Y[8];
2387
2388 Y[0] = source->y0;
2389 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002390
John Bauman89401822014-05-06 15:04:28 -04002391 if(Y[0] > Y[1])
2392 {
2393 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2394 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2395 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2396 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2397 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2398 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2399 }
2400 else
2401 {
2402 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2403 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2404 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2405 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2406 Y[6] = 0;
2407 Y[7] = 0xFF;
2408 }
2409
2410 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2411 {
2412 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2413 {
2414 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2415 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2416
2417 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2418 }
2419 }
2420
2421 source++;
2422 }
2423 }
2424
2425 (byte*&)destSlice += internal.sliceB;
2426 }
2427 }
Nicolas Capens22658242014-11-29 00:31:41 -05002428
Alexis Hetu0de50d42015-09-09 13:56:41 -04002429 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002430 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002431 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2432 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Nicolas Capens22658242014-11-29 00:31:41 -05002433
Alexis Hetu0de50d42015-09-09 13:56:41 -04002434 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002435 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002436 static byte sRGBtoLinearTable[256];
2437 static bool sRGBtoLinearTableDirty = true;
2438 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002439 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002440 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002441 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002442 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002443 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002444 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002445 }
2446
Alexis Hetu0de50d42015-09-09 13:56:41 -04002447 // Perform sRGB conversion in place after decoding
2448 byte* src = (byte*)internal.buffer;
2449 for(int y = 0; y < internal.height; y++)
2450 {
2451 byte* srcRow = src + y * internal.pitchB;
2452 for(int x = 0; x < internal.width; x++)
2453 {
2454 byte* srcPix = srcRow + x * internal.bytes;
2455 for(int i = 0; i < 3; i++)
2456 {
2457 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2458 }
2459 }
2460 }
Nicolas Capens22658242014-11-29 00:31:41 -05002461 }
2462 }
John Bauman89401822014-05-06 15:04:28 -04002463
Alexis Hetu460e41f2015-09-01 10:58:37 -04002464 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
2465 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002466 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002467
Alexis Hetu0de50d42015-09-09 13:56:41 -04002468 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2469 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2470
2471 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2472 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2473 if(isSigned)
2474 {
2475 sbyte* src = (sbyte*)internal.buffer;
2476
2477 for(int y = 0; y < internal.height; y++)
2478 {
2479 sbyte* srcRow = src + y * internal.pitchB;
2480 for(int x = internal.width - 1; x >= 0; x--)
2481 {
2482 int dx = x & 0xFFFFFFFC;
2483 int mx = x - dx;
2484 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2485 float* dstPix = (float*)(srcRow + x * internal.bytes);
2486 for(int c = nbChannels - 1; c >= 0; c--)
2487 {
2488 static const float normalization = 1.0f / 127.875f;
2489 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2490 }
2491 }
2492 }
2493 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002494 }
2495
2496 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2497 {
2498 }
2499
John Bauman89401822014-05-06 15:04:28 -04002500 unsigned int Surface::size(int width, int height, int depth, Format format)
2501 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002502 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002503 int width4 = align(width, 4);
2504 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002505
2506 switch(format)
2507 {
2508 #if S3TC_SUPPORT
2509 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002510 #endif
John Bauman89401822014-05-06 15:04:28 -04002511 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002512 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002513 case FORMAT_R11_EAC:
2514 case FORMAT_SIGNED_R11_EAC:
2515 case FORMAT_RGB8_ETC2:
2516 case FORMAT_SRGB8_ETC2:
2517 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2518 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002519 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002520 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002521 case FORMAT_DXT3:
2522 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002523 #endif
John Bauman89401822014-05-06 15:04:28 -04002524 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002525 case FORMAT_RG11_EAC:
2526 case FORMAT_SIGNED_RG11_EAC:
2527 case FORMAT_RGBA8_ETC2_EAC:
2528 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2529 case FORMAT_RGBA_ASTC_4x4_KHR:
2530 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002531 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002532 case FORMAT_RGBA_ASTC_5x4_KHR:
2533 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2534 return align(width, 5) * height4 * depth;
2535 case FORMAT_RGBA_ASTC_5x5_KHR:
2536 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2537 return align(width, 5) * align(height, 5) * depth;
2538 case FORMAT_RGBA_ASTC_6x5_KHR:
2539 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2540 return align(width, 6) * align(height, 5) * depth;
2541 case FORMAT_RGBA_ASTC_6x6_KHR:
2542 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2543 return align(width, 6) * align(height, 6) * depth;
2544 case FORMAT_RGBA_ASTC_8x5_KHR:
2545 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2546 return align(width, 8) * align(height, 5) * depth;
2547 case FORMAT_RGBA_ASTC_8x6_KHR:
2548 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2549 return align(width, 8) * align(height, 6) * depth;
2550 case FORMAT_RGBA_ASTC_8x8_KHR:
2551 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2552 return align(width, 8) * align(height, 8) * depth;
2553 case FORMAT_RGBA_ASTC_10x5_KHR:
2554 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2555 return align(width, 10) * align(height, 5) * depth;
2556 case FORMAT_RGBA_ASTC_10x6_KHR:
2557 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2558 return align(width, 10) * align(height, 6) * depth;
2559 case FORMAT_RGBA_ASTC_10x8_KHR:
2560 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2561 return align(width, 10) * align(height, 8) * depth;
2562 case FORMAT_RGBA_ASTC_10x10_KHR:
2563 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2564 return align(width, 10) * align(height, 10) * depth;
2565 case FORMAT_RGBA_ASTC_12x10_KHR:
2566 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2567 return align(width, 12) * align(height, 10) * depth;
2568 case FORMAT_RGBA_ASTC_12x12_KHR:
2569 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2570 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002571 case FORMAT_YV12_BT601:
2572 case FORMAT_YV12_BT709:
2573 case FORMAT_YV12_JFIF:
2574 {
2575 unsigned int YStride = align(width, 16);
2576 unsigned int YSize = YStride * height;
2577 unsigned int CStride = align(YStride / 2, 16);
Nicolas Capens0bac2852016-05-07 06:09:58 -04002578 unsigned int CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002579
2580 return YSize + 2 * CSize;
2581 }
John Bauman89401822014-05-06 15:04:28 -04002582 default:
2583 return bytes(format) * width * height * depth;
2584 }
John Bauman89401822014-05-06 15:04:28 -04002585 }
2586
2587 bool Surface::isStencil(Format format)
2588 {
2589 switch(format)
2590 {
2591 case FORMAT_D32:
2592 case FORMAT_D16:
2593 case FORMAT_D24X8:
2594 case FORMAT_D32F:
2595 case FORMAT_D32F_COMPLEMENTARY:
2596 case FORMAT_D32F_LOCKABLE:
2597 return false;
2598 case FORMAT_D24S8:
2599 case FORMAT_D24FS8:
2600 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002601 case FORMAT_DF24S8:
2602 case FORMAT_DF16S8:
2603 case FORMAT_D32FS8_TEXTURE:
2604 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002605 case FORMAT_INTZ:
2606 return true;
2607 default:
2608 return false;
2609 }
2610 }
2611
2612 bool Surface::isDepth(Format format)
2613 {
2614 switch(format)
2615 {
2616 case FORMAT_D32:
2617 case FORMAT_D16:
2618 case FORMAT_D24X8:
2619 case FORMAT_D24S8:
2620 case FORMAT_D24FS8:
2621 case FORMAT_D32F:
2622 case FORMAT_D32F_COMPLEMENTARY:
2623 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002624 case FORMAT_DF24S8:
2625 case FORMAT_DF16S8:
2626 case FORMAT_D32FS8_TEXTURE:
2627 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002628 case FORMAT_INTZ:
2629 return true;
2630 case FORMAT_S8:
2631 return false;
2632 default:
2633 return false;
2634 }
2635 }
2636
Alexis Hetub9dda642016-10-06 11:25:32 -04002637 bool Surface::hasQuadLayout(Format format)
2638 {
2639 switch(format)
2640 {
2641 case FORMAT_D32:
2642 case FORMAT_D16:
2643 case FORMAT_D24X8:
2644 case FORMAT_D24S8:
2645 case FORMAT_D24FS8:
2646 case FORMAT_D32F:
2647 case FORMAT_D32F_COMPLEMENTARY:
2648 case FORMAT_DF24S8:
2649 case FORMAT_DF16S8:
2650 case FORMAT_INTZ:
2651 case FORMAT_S8:
2652 case FORMAT_A8G8R8B8Q:
2653 case FORMAT_X8G8R8B8Q:
2654 return true;
2655 case FORMAT_D32F_LOCKABLE:
2656 case FORMAT_D32FS8_TEXTURE:
2657 case FORMAT_D32FS8_SHADOW:
2658 default:
2659 break;
2660 }
2661
2662 return false;
2663 }
2664
John Bauman89401822014-05-06 15:04:28 -04002665 bool Surface::isPalette(Format format)
2666 {
2667 switch(format)
2668 {
2669 case FORMAT_P8:
2670 case FORMAT_A8P8:
2671 return true;
2672 default:
2673 return false;
2674 }
2675 }
2676
2677 bool Surface::isFloatFormat(Format format)
2678 {
2679 switch(format)
2680 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002681 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002682 case FORMAT_R8G8B8:
2683 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002684 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002685 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002686 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002687 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002688 case FORMAT_SRGB8_X8:
2689 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002690 case FORMAT_A8B8G8R8I:
2691 case FORMAT_R8UI:
2692 case FORMAT_G8R8UI:
2693 case FORMAT_X8B8G8R8UI:
2694 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002695 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002696 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002697 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002698 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002699 case FORMAT_R8I_SNORM:
2700 case FORMAT_G8R8I_SNORM:
2701 case FORMAT_X8B8G8R8I_SNORM:
2702 case FORMAT_A8B8G8R8I_SNORM:
2703 case FORMAT_R16I:
2704 case FORMAT_R16UI:
2705 case FORMAT_G16R16I:
2706 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002707 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002708 case FORMAT_X16B16G16R16I:
2709 case FORMAT_X16B16G16R16UI:
2710 case FORMAT_A16B16G16R16I:
2711 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002712 case FORMAT_A16B16G16R16:
2713 case FORMAT_V8U8:
2714 case FORMAT_Q8W8V8U8:
2715 case FORMAT_X8L8V8U8:
2716 case FORMAT_V16U16:
2717 case FORMAT_A16W16V16U16:
2718 case FORMAT_Q16W16V16U16:
2719 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002720 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002721 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002722 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002723 case FORMAT_L8:
2724 case FORMAT_L16:
2725 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002726 case FORMAT_YV12_BT601:
2727 case FORMAT_YV12_BT709:
2728 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002729 case FORMAT_R32I:
2730 case FORMAT_R32UI:
2731 case FORMAT_G32R32I:
2732 case FORMAT_G32R32UI:
2733 case FORMAT_X32B32G32R32I:
2734 case FORMAT_X32B32G32R32UI:
2735 case FORMAT_A32B32G32R32I:
2736 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002737 return false;
Nicolas Capens400667e2017-03-29 14:40:14 -04002738 case FORMAT_R16F:
2739 case FORMAT_G16R16F:
2740 case FORMAT_B16G16R16F:
2741 case FORMAT_A16B16G16R16F:
John Bauman89401822014-05-06 15:04:28 -04002742 case FORMAT_R32F:
2743 case FORMAT_G32R32F:
Nicolas Capensc018e082016-12-13 10:19:33 -05002744 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002745 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002746 case FORMAT_A32B32G32R32F:
2747 case FORMAT_D32F:
2748 case FORMAT_D32F_COMPLEMENTARY:
2749 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002750 case FORMAT_D32FS8_TEXTURE:
2751 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002752 case FORMAT_L16F:
2753 case FORMAT_A16L16F:
2754 case FORMAT_L32F:
2755 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002756 return true;
2757 default:
2758 ASSERT(false);
2759 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002760
John Bauman89401822014-05-06 15:04:28 -04002761 return false;
2762 }
2763
2764 bool Surface::isUnsignedComponent(Format format, int component)
2765 {
2766 switch(format)
2767 {
2768 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002769 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002770 case FORMAT_R8G8B8:
2771 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002772 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002773 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002774 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002775 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002776 case FORMAT_SRGB8_X8:
2777 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002778 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002779 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002780 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002781 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002782 case FORMAT_G16R16UI:
2783 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002784 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002785 case FORMAT_A16B16G16R16UI:
2786 case FORMAT_R32UI:
2787 case FORMAT_G32R32UI:
2788 case FORMAT_X32B32G32R32UI:
2789 case FORMAT_A32B32G32R32UI:
2790 case FORMAT_R8UI:
2791 case FORMAT_G8R8UI:
2792 case FORMAT_X8B8G8R8UI:
2793 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002794 case FORMAT_D32F:
2795 case FORMAT_D32F_COMPLEMENTARY:
2796 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002797 case FORMAT_D32FS8_TEXTURE:
2798 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002799 case FORMAT_A8:
2800 case FORMAT_R8:
2801 case FORMAT_L8:
2802 case FORMAT_L16:
2803 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002804 case FORMAT_YV12_BT601:
2805 case FORMAT_YV12_BT709:
2806 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002807 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002808 case FORMAT_A8B8G8R8I:
2809 case FORMAT_A16B16G16R16I:
2810 case FORMAT_A32B32G32R32I:
2811 case FORMAT_A8B8G8R8I_SNORM:
2812 case FORMAT_Q8W8V8U8:
2813 case FORMAT_Q16W16V16U16:
2814 case FORMAT_A32B32G32R32F:
2815 return false;
2816 case FORMAT_R32F:
2817 case FORMAT_R8I:
2818 case FORMAT_R16I:
2819 case FORMAT_R32I:
2820 case FORMAT_R8I_SNORM:
2821 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002822 case FORMAT_V8U8:
2823 case FORMAT_X8L8V8U8:
2824 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002825 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002826 case FORMAT_G8R8I:
2827 case FORMAT_G16R16I:
2828 case FORMAT_G32R32I:
2829 case FORMAT_G8R8I_SNORM:
2830 return component >= 2;
2831 case FORMAT_A16W16V16U16:
Nicolas Capens2e363b02016-12-14 10:32:36 -05002832 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002833 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002834 case FORMAT_X8B8G8R8I:
2835 case FORMAT_X16B16G16R16I:
2836 case FORMAT_X32B32G32R32I:
2837 case FORMAT_X8B8G8R8I_SNORM:
2838 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002839 default:
2840 ASSERT(false);
2841 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002842
John Bauman89401822014-05-06 15:04:28 -04002843 return false;
2844 }
2845
2846 bool Surface::isSRGBreadable(Format format)
2847 {
2848 // Keep in sync with Capabilities::isSRGBreadable
2849 switch(format)
2850 {
2851 case FORMAT_L8:
2852 case FORMAT_A8L8:
2853 case FORMAT_R8G8B8:
2854 case FORMAT_A8R8G8B8:
2855 case FORMAT_X8R8G8B8:
2856 case FORMAT_A8B8G8R8:
2857 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002858 case FORMAT_SRGB8_X8:
2859 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002860 case FORMAT_R5G6B5:
2861 case FORMAT_X1R5G5B5:
2862 case FORMAT_A1R5G5B5:
2863 case FORMAT_A4R4G4B4:
2864 #if S3TC_SUPPORT
2865 case FORMAT_DXT1:
2866 case FORMAT_DXT3:
2867 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002868 #endif
John Bauman89401822014-05-06 15:04:28 -04002869 case FORMAT_ATI1:
2870 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002871 return true;
2872 default:
2873 return false;
2874 }
John Bauman89401822014-05-06 15:04:28 -04002875 }
2876
2877 bool Surface::isSRGBwritable(Format format)
2878 {
2879 // Keep in sync with Capabilities::isSRGBwritable
2880 switch(format)
2881 {
2882 case FORMAT_NULL:
2883 case FORMAT_A8R8G8B8:
2884 case FORMAT_X8R8G8B8:
2885 case FORMAT_A8B8G8R8:
2886 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002887 case FORMAT_SRGB8_X8:
2888 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002889 case FORMAT_R5G6B5:
2890 return true;
2891 default:
2892 return false;
2893 }
2894 }
2895
2896 bool Surface::isCompressed(Format format)
2897 {
2898 switch(format)
2899 {
2900 #if S3TC_SUPPORT
2901 case FORMAT_DXT1:
2902 case FORMAT_DXT3:
2903 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002904 #endif
John Bauman89401822014-05-06 15:04:28 -04002905 case FORMAT_ATI1:
2906 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002907 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002908 case FORMAT_R11_EAC:
2909 case FORMAT_SIGNED_R11_EAC:
2910 case FORMAT_RG11_EAC:
2911 case FORMAT_SIGNED_RG11_EAC:
2912 case FORMAT_RGB8_ETC2:
2913 case FORMAT_SRGB8_ETC2:
2914 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2915 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2916 case FORMAT_RGBA8_ETC2_EAC:
2917 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2918 case FORMAT_RGBA_ASTC_4x4_KHR:
2919 case FORMAT_RGBA_ASTC_5x4_KHR:
2920 case FORMAT_RGBA_ASTC_5x5_KHR:
2921 case FORMAT_RGBA_ASTC_6x5_KHR:
2922 case FORMAT_RGBA_ASTC_6x6_KHR:
2923 case FORMAT_RGBA_ASTC_8x5_KHR:
2924 case FORMAT_RGBA_ASTC_8x6_KHR:
2925 case FORMAT_RGBA_ASTC_8x8_KHR:
2926 case FORMAT_RGBA_ASTC_10x5_KHR:
2927 case FORMAT_RGBA_ASTC_10x6_KHR:
2928 case FORMAT_RGBA_ASTC_10x8_KHR:
2929 case FORMAT_RGBA_ASTC_10x10_KHR:
2930 case FORMAT_RGBA_ASTC_12x10_KHR:
2931 case FORMAT_RGBA_ASTC_12x12_KHR:
2932 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
2933 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2934 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2935 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2936 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2937 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2938 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2939 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2940 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2941 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2942 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2943 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2944 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2945 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04002946 return true;
John Bauman89401822014-05-06 15:04:28 -04002947 default:
2948 return false;
2949 }
2950 }
2951
Nicolas Capens492887a2017-03-27 14:50:51 -04002952 bool Surface::isSignedNonNormalizedInteger(Format format)
Alexis Hetu43577b82015-10-21 15:32:16 -04002953 {
2954 switch(format)
2955 {
2956 case FORMAT_A8B8G8R8I:
2957 case FORMAT_X8B8G8R8I:
2958 case FORMAT_G8R8I:
2959 case FORMAT_R8I:
2960 case FORMAT_A8B8G8R8UI:
2961 case FORMAT_X8B8G8R8UI:
2962 case FORMAT_G8R8UI:
2963 case FORMAT_R8UI:
2964 case FORMAT_A16B16G16R16I:
2965 case FORMAT_X16B16G16R16I:
2966 case FORMAT_G16R16I:
2967 case FORMAT_R16I:
Nicolas Capens492887a2017-03-27 14:50:51 -04002968 return true;
2969 default:
2970 return false;
2971 }
2972 }
2973
2974 bool Surface::isUnsignedNonNormalizedInteger(Format format)
2975 {
2976 switch(format)
2977 {
Alexis Hetu43577b82015-10-21 15:32:16 -04002978 case FORMAT_A16B16G16R16UI:
2979 case FORMAT_X16B16G16R16UI:
2980 case FORMAT_G16R16UI:
2981 case FORMAT_R16UI:
2982 case FORMAT_A32B32G32R32I:
2983 case FORMAT_X32B32G32R32I:
2984 case FORMAT_G32R32I:
2985 case FORMAT_R32I:
2986 case FORMAT_A32B32G32R32UI:
2987 case FORMAT_X32B32G32R32UI:
2988 case FORMAT_G32R32UI:
2989 case FORMAT_R32UI:
2990 return true;
2991 default:
2992 return false;
2993 }
2994 }
2995
Nicolas Capens492887a2017-03-27 14:50:51 -04002996 bool Surface::isNonNormalizedInteger(Format format)
2997 {
2998 return isSignedNonNormalizedInteger(format) ||
2999 isUnsignedNonNormalizedInteger(format);
3000 }
3001
3002 bool Surface::isNormalizedInteger(Format format)
3003 {
3004 return !isFloatFormat(format) &&
3005 !isNonNormalizedInteger(format) &&
3006 !isCompressed(format) &&
3007 !isDepth(format) &&
3008 !isStencil(format);
3009 }
3010
John Bauman89401822014-05-06 15:04:28 -04003011 int Surface::componentCount(Format format)
3012 {
3013 switch(format)
3014 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003015 case FORMAT_R5G6B5: return 3;
3016 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003017 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003018 case FORMAT_X8B8G8R8: return 3;
3019 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04003020 case FORMAT_SRGB8_X8: return 3;
3021 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003022 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003023 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003024 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003025 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003026 case FORMAT_R8I_SNORM: return 1;
3027 case FORMAT_G8R8I_SNORM: return 2;
3028 case FORMAT_X8B8G8R8I_SNORM:return 3;
3029 case FORMAT_A8B8G8R8I_SNORM:return 4;
3030 case FORMAT_R8UI: return 1;
3031 case FORMAT_G8R8UI: return 2;
3032 case FORMAT_X8B8G8R8UI: return 3;
3033 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05003034 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003035 case FORMAT_G16R16I: return 2;
3036 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003037 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003038 case FORMAT_G32R32I: return 2;
3039 case FORMAT_G32R32UI: return 2;
3040 case FORMAT_X16B16G16R16I: return 3;
3041 case FORMAT_X16B16G16R16UI: return 3;
3042 case FORMAT_A16B16G16R16I: return 4;
3043 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003044 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003045 case FORMAT_X32B32G32R32I: return 3;
3046 case FORMAT_X32B32G32R32UI: return 3;
3047 case FORMAT_A32B32G32R32I: return 4;
3048 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003049 case FORMAT_V8U8: return 2;
3050 case FORMAT_Q8W8V8U8: return 4;
3051 case FORMAT_X8L8V8U8: return 3;
3052 case FORMAT_V16U16: return 2;
3053 case FORMAT_A16W16V16U16: return 4;
3054 case FORMAT_Q16W16V16U16: return 4;
3055 case FORMAT_R32F: return 1;
3056 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003057 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003058 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003059 case FORMAT_D32F: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003060 case FORMAT_D32F_LOCKABLE: return 1;
3061 case FORMAT_D32FS8_TEXTURE: return 1;
3062 case FORMAT_D32FS8_SHADOW: return 1;
3063 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003064 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003065 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003066 case FORMAT_R16I: return 1;
3067 case FORMAT_R16UI: return 1;
3068 case FORMAT_R32I: return 1;
3069 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003070 case FORMAT_L8: return 1;
3071 case FORMAT_L16: return 1;
3072 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003073 case FORMAT_YV12_BT601: return 3;
3074 case FORMAT_YV12_BT709: return 3;
3075 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003076 default:
3077 ASSERT(false);
3078 }
3079
3080 return 1;
3081 }
3082
3083 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
3084 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003085 // Render targets require 2x2 quads
3086 int width2 = (width + 1) & ~1;
3087 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003088
Nicolas Capens6ea71872015-06-26 13:00:48 -04003089 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
Nicolas Capens48ef1252016-11-07 15:30:33 -05003090 // and stencil operations also read 8 bytes per four 8-bit stencil values,
Nicolas Capens6ea71872015-06-26 13:00:48 -04003091 // so we have to allocate 4 extra bytes to avoid buffer overruns.
3092 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003093 }
3094
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003095 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003096 {
3097 while((size_t)buffer & 0x1 && bytes >= 1)
3098 {
3099 *(char*)buffer = (char)pattern;
3100 (char*&)buffer += 1;
3101 bytes -= 1;
3102 }
3103
3104 while((size_t)buffer & 0x3 && bytes >= 2)
3105 {
3106 *(short*)buffer = (short)pattern;
3107 (short*&)buffer += 1;
3108 bytes -= 2;
3109 }
3110
Nicolas Capens47dc8672017-04-25 12:54:39 -04003111 #if defined(__i386__) || defined(__x86_64__)
3112 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04003113 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003114 while((size_t)buffer & 0xF && bytes >= 4)
3115 {
3116 *(int*)buffer = pattern;
3117 (int*&)buffer += 1;
3118 bytes -= 4;
3119 }
3120
3121 __m128 quad = _mm_set_ps1((float&)pattern);
3122
3123 float *pointer = (float*)buffer;
3124 int qxwords = bytes / 64;
3125 bytes -= qxwords * 64;
3126
3127 while(qxwords--)
3128 {
3129 _mm_stream_ps(pointer + 0, quad);
3130 _mm_stream_ps(pointer + 4, quad);
3131 _mm_stream_ps(pointer + 8, quad);
3132 _mm_stream_ps(pointer + 12, quad);
3133
3134 pointer += 16;
3135 }
3136
3137 buffer = pointer;
John Bauman89401822014-05-06 15:04:28 -04003138 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003139 #endif
John Bauman89401822014-05-06 15:04:28 -04003140
3141 while(bytes >= 4)
3142 {
3143 *(int*)buffer = (int)pattern;
3144 (int*&)buffer += 1;
3145 bytes -= 4;
3146 }
3147
3148 while(bytes >= 2)
3149 {
3150 *(short*)buffer = (short)pattern;
3151 (short*&)buffer += 1;
3152 bytes -= 2;
3153 }
3154
3155 while(bytes >= 1)
3156 {
3157 *(char*)buffer = (char)pattern;
3158 (char*&)buffer += 1;
3159 bytes -= 1;
3160 }
3161 }
3162
Nicolas Capensbf7a8142017-05-19 10:57:28 -04003163 void Surface::sync()
3164 {
3165 resource->lock(EXCLUSIVE);
3166 resource->unlock();
3167 }
3168
Alexis Hetu75b650f2015-11-19 17:40:15 -05003169 bool Surface::isEntire(const SliceRect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003170 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003171 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3172 }
John Bauman89401822014-05-06 15:04:28 -04003173
Nicolas Capensc39901e2016-03-21 16:37:44 -04003174 SliceRect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003175 {
Nicolas Capensc39901e2016-03-21 16:37:44 -04003176 return SliceRect(0, 0, internal.width, internal.height, 0);
John Bauman89401822014-05-06 15:04:28 -04003177 }
3178
Nicolas Capensc39901e2016-03-21 16:37:44 -04003179 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003180 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003181 if(width == 0 || height == 0) return;
3182
John Bauman89401822014-05-06 15:04:28 -04003183 // Not overlapping
3184 if(x0 > internal.width) return;
3185 if(y0 > internal.height) return;
3186 if(x0 + width < 0) return;
3187 if(y0 + height < 0) return;
3188
3189 // Clip against dimensions
3190 if(x0 < 0) {width += x0; x0 = 0;}
3191 if(x0 + width > internal.width) width = internal.width - x0;
3192 if(y0 < 0) {height += y0; y0 = 0;}
3193 if(y0 + height > internal.height) height = internal.height - y0;
3194
3195 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3196 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3197
3198 int width2 = (internal.width + 1) & ~1;
3199
3200 int x1 = x0 + width;
3201 int y1 = y0 + height;
3202
3203 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04003204 internal.format == FORMAT_D32FS8_TEXTURE ||
3205 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04003206 {
3207 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3208
3209 for(int z = 0; z < internal.depth; z++)
3210 {
3211 for(int y = y0; y < y1; y++)
3212 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003213 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003214 target += width2;
3215 }
3216 }
3217
3218 unlockInternal();
3219 }
3220 else // Quad layout
3221 {
3222 if(complementaryDepthBuffer)
3223 {
3224 depth = 1 - depth;
3225 }
3226
3227 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3228
Alexis Hetu358a1442015-12-03 14:23:10 -05003229 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3230 int oddX1 = (x1 & ~1) * 2;
3231 int evenX0 = ((x0 + 1) & ~1) * 2;
3232 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3233
John Bauman89401822014-05-06 15:04:28 -04003234 for(int z = 0; z < internal.depth; z++)
3235 {
3236 for(int y = y0; y < y1; y++)
3237 {
3238 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003239
John Bauman89401822014-05-06 15:04:28 -04003240 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3241 {
3242 if((x0 & 1) != 0)
3243 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003244 target[oddX0 + 0] = depth;
3245 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003246 }
3247
Alexis Hetu358a1442015-12-03 14:23:10 -05003248 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003249 // {
3250 // target[x2 + 0] = depth;
3251 // target[x2 + 1] = depth;
3252 // target[x2 + 2] = depth;
3253 // target[x2 + 3] = depth;
3254 // }
3255
3256 // __asm
3257 // {
3258 // movss xmm0, depth
3259 // shufps xmm0, xmm0, 0x00
3260 //
3261 // mov eax, x0
3262 // add eax, 1
3263 // and eax, 0xFFFFFFFE
3264 // cmp eax, x1
3265 // jge qEnd
3266 //
3267 // mov edi, target
3268 //
3269 // qLoop:
3270 // movntps [edi+8*eax], xmm0
3271 //
3272 // add eax, 2
3273 // cmp eax, x1
3274 // jl qLoop
3275 // qEnd:
3276 // }
3277
Alexis Hetu358a1442015-12-03 14:23:10 -05003278 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003279
3280 if((x1 & 1) != 0)
3281 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003282 target[oddX1 + 0] = depth;
3283 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003284 }
3285
3286 y++;
3287 }
3288 else
3289 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003290 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003291 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003292 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003293 }
3294 }
3295 }
3296
3297 buffer += internal.sliceP;
3298 }
3299
3300 unlockInternal();
3301 }
3302 }
3303
Nicolas Capensc39901e2016-03-21 16:37:44 -04003304 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003305 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003306 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003307
John Bauman89401822014-05-06 15:04:28 -04003308 // Not overlapping
3309 if(x0 > internal.width) return;
3310 if(y0 > internal.height) return;
3311 if(x0 + width < 0) return;
3312 if(y0 + height < 0) return;
3313
3314 // Clip against dimensions
3315 if(x0 < 0) {width += x0; x0 = 0;}
3316 if(x0 + width > internal.width) width = internal.width - x0;
3317 if(y0 < 0) {height += y0; y0 = 0;}
3318 if(y0 + height > internal.height) height = internal.height - y0;
3319
3320 int width2 = (internal.width + 1) & ~1;
3321
3322 int x1 = x0 + width;
3323 int y1 = y0 + height;
3324
Alexis Hetu358a1442015-12-03 14:23:10 -05003325 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3326 int oddX1 = (x1 & ~1) * 2;
3327 int evenX0 = ((x0 + 1) & ~1) * 2;
3328 int evenBytes = oddX1 - evenX0;
3329
John Bauman89401822014-05-06 15:04:28 -04003330 unsigned char maskedS = s & mask;
3331 unsigned char invMask = ~mask;
3332 unsigned int fill = maskedS;
Tom Anderson69bc6e82017-03-20 11:54:29 -07003333 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
John Bauman89401822014-05-06 15:04:28 -04003334
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003335 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003336
3337 // Stencil buffers are assumed to use quad layout
3338 for(int z = 0; z < stencil.depth; z++)
John Bauman89401822014-05-06 15:04:28 -04003339 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003340 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003341 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003342 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3343
3344 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003345 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003346 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003347 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003348 target[oddX0 + 0] = fill;
3349 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003350 }
3351
Alexis Hetu358a1442015-12-03 14:23:10 -05003352 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003353
3354 if((x1 & 1) != 0)
3355 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003356 target[oddX1 + 0] = fill;
3357 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003358 }
3359
3360 y++;
3361 }
3362 else
3363 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003364 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
Alexis Hetu2b052f82015-11-25 13:57:28 -05003365 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003366 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003367 }
John Bauman89401822014-05-06 15:04:28 -04003368 }
3369 }
3370
Alexis Hetu2b052f82015-11-25 13:57:28 -05003371 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003372 }
John Bauman89401822014-05-06 15:04:28 -04003373
Alexis Hetu2b052f82015-11-25 13:57:28 -05003374 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003375 }
3376
3377 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3378 {
3379 unsigned char *row;
3380 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003381
John Bauman89401822014-05-06 15:04:28 -04003382 if(internal.dirty)
3383 {
3384 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3385 buffer = &internal;
3386 }
3387 else
3388 {
3389 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3390 buffer = &external;
3391 }
3392
3393 if(buffer->bytes <= 4)
3394 {
3395 int c;
3396 buffer->write(&c, color);
3397
3398 if(buffer->bytes <= 1) c = (c << 8) | c;
3399 if(buffer->bytes <= 2) c = (c << 16) | c;
3400
3401 for(int y = 0; y < height; y++)
3402 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003403 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003404
3405 row += buffer->pitchB;
3406 }
3407 }
3408 else // Generic
3409 {
3410 for(int y = 0; y < height; y++)
3411 {
3412 unsigned char *element = row;
3413
3414 for(int x = 0; x < width; x++)
3415 {
3416 buffer->write(element, color);
3417
3418 element += buffer->bytes;
3419 }
3420
3421 row += buffer->pitchB;
3422 }
3423 }
3424
3425 if(buffer == &internal)
3426 {
3427 unlockInternal();
3428 }
3429 else
3430 {
3431 unlockExternal();
3432 }
3433 }
3434
Alexis Hetu43577b82015-10-21 15:32:16 -04003435 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003436 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003437 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003438
Alexis Hetu43577b82015-10-21 15:32:16 -04003439 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003440
Alexis Hetu43577b82015-10-21 15:32:16 -04003441 if(!filter)
3442 {
3443 color = source->internal.read((int)srcX, (int)srcY);
3444 }
3445 else // Bilinear filtering
3446 {
3447 color = source->internal.sample(srcX, srcY);
3448 }
John Bauman89401822014-05-06 15:04:28 -04003449
3450 internal.write(x, y, color);
3451 }
3452
Alexis Hetu43577b82015-10-21 15:32:16 -04003453 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3454 {
3455 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3456
3457 sw::Color<float> color;
3458
3459 if(!filter)
3460 {
3461 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3462 }
3463 else // Bilinear filtering
3464 {
3465 color = source->internal.sample(srcX, srcY, srcZ);
3466 }
3467
3468 internal.write(x, y, z, color);
3469 }
3470
John Bauman89401822014-05-06 15:04:28 -04003471 bool Surface::hasStencil() const
3472 {
3473 return isStencil(external.format);
3474 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003475
John Bauman89401822014-05-06 15:04:28 -04003476 bool Surface::hasDepth() const
3477 {
3478 return isDepth(external.format);
3479 }
3480
3481 bool Surface::hasPalette() const
3482 {
3483 return isPalette(external.format);
3484 }
3485
3486 bool Surface::isRenderTarget() const
3487 {
3488 return renderTarget;
3489 }
3490
3491 bool Surface::hasDirtyMipmaps() const
3492 {
3493 return dirtyMipmaps;
3494 }
3495
3496 void Surface::cleanMipmaps()
3497 {
3498 dirtyMipmaps = false;
3499 }
3500
3501 Resource *Surface::getResource()
3502 {
3503 return resource;
3504 }
3505
3506 bool Surface::identicalFormats() const
3507 {
John Bauman66b8ab22014-05-06 15:57:45 -04003508 return external.format == internal.format &&
3509 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003510 external.height == internal.height &&
3511 external.depth == internal.depth &&
3512 external.pitchB == internal.pitchB &&
3513 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003514 }
3515
3516 Format Surface::selectInternalFormat(Format format) const
3517 {
3518 switch(format)
3519 {
3520 case FORMAT_NULL:
3521 return FORMAT_NULL;
3522 case FORMAT_P8:
3523 case FORMAT_A8P8:
3524 case FORMAT_A4R4G4B4:
3525 case FORMAT_A1R5G5B5:
3526 case FORMAT_A8R3G3B2:
3527 return FORMAT_A8R8G8B8;
3528 case FORMAT_A8:
3529 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003530 case FORMAT_R8I:
3531 return FORMAT_R8I;
3532 case FORMAT_R8UI:
3533 return FORMAT_R8UI;
3534 case FORMAT_R8I_SNORM:
3535 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003536 case FORMAT_R8:
3537 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003538 case FORMAT_R16I:
3539 return FORMAT_R16I;
3540 case FORMAT_R16UI:
3541 return FORMAT_R16UI;
3542 case FORMAT_R32I:
3543 return FORMAT_R32I;
3544 case FORMAT_R32UI:
3545 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003546 case FORMAT_X16B16G16R16I:
3547 case FORMAT_A16B16G16R16I:
3548 return FORMAT_A16B16G16R16I;
3549 case FORMAT_X16B16G16R16UI:
3550 case FORMAT_A16B16G16R16UI:
3551 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003552 case FORMAT_A2R10G10B10:
3553 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003554 case FORMAT_A16B16G16R16:
3555 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003556 case FORMAT_X32B32G32R32I:
3557 case FORMAT_A32B32G32R32I:
3558 return FORMAT_A32B32G32R32I;
3559 case FORMAT_X32B32G32R32UI:
3560 case FORMAT_A32B32G32R32UI:
3561 return FORMAT_A32B32G32R32UI;
3562 case FORMAT_G8R8I:
3563 return FORMAT_G8R8I;
3564 case FORMAT_G8R8UI:
3565 return FORMAT_G8R8UI;
3566 case FORMAT_G8R8I_SNORM:
3567 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003568 case FORMAT_G8R8:
3569 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003570 case FORMAT_G16R16I:
3571 return FORMAT_G16R16I;
3572 case FORMAT_G16R16UI:
3573 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003574 case FORMAT_G16R16:
3575 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003576 case FORMAT_G32R32I:
3577 return FORMAT_G32R32I;
3578 case FORMAT_G32R32UI:
3579 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003580 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003581 if(lockable || !quadLayoutEnabled)
3582 {
3583 return FORMAT_A8R8G8B8;
3584 }
3585 else
3586 {
3587 return FORMAT_A8G8R8B8Q;
3588 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003589 case FORMAT_A8B8G8R8I:
3590 return FORMAT_A8B8G8R8I;
3591 case FORMAT_A8B8G8R8UI:
3592 return FORMAT_A8B8G8R8UI;
3593 case FORMAT_A8B8G8R8I_SNORM:
3594 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003595 case FORMAT_R5G5B5A1:
3596 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003597 case FORMAT_A8B8G8R8:
3598 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003599 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003600 return FORMAT_R5G6B5;
3601 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003602 case FORMAT_R8G8B8:
3603 case FORMAT_X4R4G4B4:
3604 case FORMAT_X1R5G5B5:
3605 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003606 if(lockable || !quadLayoutEnabled)
3607 {
3608 return FORMAT_X8R8G8B8;
3609 }
3610 else
3611 {
3612 return FORMAT_X8G8R8B8Q;
3613 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003614 case FORMAT_X8B8G8R8I:
3615 return FORMAT_X8B8G8R8I;
3616 case FORMAT_X8B8G8R8UI:
3617 return FORMAT_X8B8G8R8UI;
3618 case FORMAT_X8B8G8R8I_SNORM:
3619 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003620 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003621 case FORMAT_X8B8G8R8:
3622 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003623 case FORMAT_SRGB8_X8:
3624 return FORMAT_SRGB8_X8;
3625 case FORMAT_SRGB8_A8:
3626 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003627 // Compressed formats
3628 #if S3TC_SUPPORT
3629 case FORMAT_DXT1:
3630 case FORMAT_DXT3:
3631 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003632 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003633 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3634 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3635 case FORMAT_RGBA8_ETC2_EAC:
3636 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3637 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3638 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3639 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3640 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3641 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3642 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3643 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3644 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3645 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3646 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3647 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3648 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3649 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3650 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3651 return FORMAT_A8R8G8B8;
3652 case FORMAT_RGBA_ASTC_4x4_KHR:
3653 case FORMAT_RGBA_ASTC_5x4_KHR:
3654 case FORMAT_RGBA_ASTC_5x5_KHR:
3655 case FORMAT_RGBA_ASTC_6x5_KHR:
3656 case FORMAT_RGBA_ASTC_6x6_KHR:
3657 case FORMAT_RGBA_ASTC_8x5_KHR:
3658 case FORMAT_RGBA_ASTC_8x6_KHR:
3659 case FORMAT_RGBA_ASTC_8x8_KHR:
3660 case FORMAT_RGBA_ASTC_10x5_KHR:
3661 case FORMAT_RGBA_ASTC_10x6_KHR:
3662 case FORMAT_RGBA_ASTC_10x8_KHR:
3663 case FORMAT_RGBA_ASTC_10x10_KHR:
3664 case FORMAT_RGBA_ASTC_12x10_KHR:
3665 case FORMAT_RGBA_ASTC_12x12_KHR:
3666 // ASTC supports HDR, so a floating point format is required to represent it properly
3667 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003668 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003669 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003670 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003671 case FORMAT_SIGNED_R11_EAC:
3672 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003673 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003674 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003675 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003676 case FORMAT_SIGNED_RG11_EAC:
3677 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003678 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003679 case FORMAT_RGB8_ETC2:
3680 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003681 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003682 // Bumpmap formats
3683 case FORMAT_V8U8: return FORMAT_V8U8;
3684 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3685 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3686 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3687 case FORMAT_V16U16: return FORMAT_V16U16;
3688 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3689 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3690 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003691 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003692 case FORMAT_R16F: return FORMAT_R32F;
3693 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003694 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003695 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003696 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003697 case FORMAT_R32F: return FORMAT_R32F;
3698 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003699 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3700 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003701 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3702 // Luminance formats
3703 case FORMAT_L8: return FORMAT_L8;
3704 case FORMAT_A4L4: return FORMAT_A8L8;
3705 case FORMAT_L16: return FORMAT_L16;
3706 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003707 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003708 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003709 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003710 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003711 // Depth/stencil formats
3712 case FORMAT_D16:
3713 case FORMAT_D32:
3714 case FORMAT_D24X8:
3715 case FORMAT_D24S8:
3716 case FORMAT_D24FS8:
3717 if(hasParent) // Texture
3718 {
John Bauman66b8ab22014-05-06 15:57:45 -04003719 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003720 }
3721 else if(complementaryDepthBuffer)
3722 {
3723 return FORMAT_D32F_COMPLEMENTARY;
3724 }
3725 else
3726 {
3727 return FORMAT_D32F;
3728 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003729 case FORMAT_D32F: return FORMAT_D32F;
John Bauman66b8ab22014-05-06 15:57:45 -04003730 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3731 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3732 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3733 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3734 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003735 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3736 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3737 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003738 default:
3739 ASSERT(false);
3740 }
3741
3742 return FORMAT_NULL;
3743 }
3744
3745 void Surface::setTexturePalette(unsigned int *palette)
3746 {
3747 Surface::palette = palette;
3748 Surface::paletteID++;
3749 }
3750
3751 void Surface::resolve()
3752 {
3753 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3754 {
3755 return;
3756 }
3757
3758 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3759
John Bauman89401822014-05-06 15:04:28 -04003760 int width = internal.width;
3761 int height = internal.height;
3762 int pitch = internal.pitchB;
3763 int slice = internal.sliceB;
3764
3765 unsigned char *source0 = (unsigned char*)source;
3766 unsigned char *source1 = source0 + slice;
3767 unsigned char *source2 = source1 + slice;
3768 unsigned char *source3 = source2 + slice;
3769 unsigned char *source4 = source3 + slice;
3770 unsigned char *source5 = source4 + slice;
3771 unsigned char *source6 = source5 + slice;
3772 unsigned char *source7 = source6 + slice;
3773 unsigned char *source8 = source7 + slice;
3774 unsigned char *source9 = source8 + slice;
3775 unsigned char *sourceA = source9 + slice;
3776 unsigned char *sourceB = sourceA + slice;
3777 unsigned char *sourceC = sourceB + slice;
3778 unsigned char *sourceD = sourceC + slice;
3779 unsigned char *sourceE = sourceD + slice;
3780 unsigned char *sourceF = sourceE + slice;
3781
Alexis Hetu049a1872016-04-25 16:59:58 -04003782 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
3783 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
3784 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04003785 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003786 #if defined(__i386__) || defined(__x86_64__)
3787 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04003788 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003789 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04003790 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003791 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003792 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003793 for(int x = 0; x < width; x += 4)
3794 {
3795 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3796 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003797
Nicolas Capens47dc8672017-04-25 12:54:39 -04003798 c0 = _mm_avg_epu8(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04003799
Nicolas Capens47dc8672017-04-25 12:54:39 -04003800 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3801 }
3802
3803 source0 += pitch;
3804 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003805 }
John Bauman89401822014-05-06 15:04:28 -04003806 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003807 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04003808 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003809 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003810 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003811 for(int x = 0; x < width; x += 4)
3812 {
3813 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3814 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3815 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3816 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003817
Nicolas Capens47dc8672017-04-25 12:54:39 -04003818 c0 = _mm_avg_epu8(c0, c1);
3819 c2 = _mm_avg_epu8(c2, c3);
3820 c0 = _mm_avg_epu8(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04003821
Nicolas Capens47dc8672017-04-25 12:54:39 -04003822 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3823 }
3824
3825 source0 += pitch;
3826 source1 += pitch;
3827 source2 += pitch;
3828 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003829 }
John Bauman89401822014-05-06 15:04:28 -04003830 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003831 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04003832 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003833 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003834 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003835 for(int x = 0; x < width; x += 4)
3836 {
3837 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3838 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3839 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3840 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3841 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3842 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3843 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3844 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003845
Nicolas Capens47dc8672017-04-25 12:54:39 -04003846 c0 = _mm_avg_epu8(c0, c1);
3847 c2 = _mm_avg_epu8(c2, c3);
3848 c4 = _mm_avg_epu8(c4, c5);
3849 c6 = _mm_avg_epu8(c6, c7);
3850 c0 = _mm_avg_epu8(c0, c2);
3851 c4 = _mm_avg_epu8(c4, c6);
3852 c0 = _mm_avg_epu8(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04003853
Nicolas Capens47dc8672017-04-25 12:54:39 -04003854 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3855 }
3856
3857 source0 += pitch;
3858 source1 += pitch;
3859 source2 += pitch;
3860 source3 += pitch;
3861 source4 += pitch;
3862 source5 += pitch;
3863 source6 += pitch;
3864 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003865 }
John Bauman89401822014-05-06 15:04:28 -04003866 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003867 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04003868 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003869 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003870 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003871 for(int x = 0; x < width; x += 4)
3872 {
3873 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3874 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3875 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3876 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3877 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3878 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3879 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3880 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3881 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3882 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3883 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3884 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3885 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3886 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3887 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3888 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04003889
Nicolas Capens47dc8672017-04-25 12:54:39 -04003890 c0 = _mm_avg_epu8(c0, c1);
3891 c2 = _mm_avg_epu8(c2, c3);
3892 c4 = _mm_avg_epu8(c4, c5);
3893 c6 = _mm_avg_epu8(c6, c7);
3894 c8 = _mm_avg_epu8(c8, c9);
3895 cA = _mm_avg_epu8(cA, cB);
3896 cC = _mm_avg_epu8(cC, cD);
3897 cE = _mm_avg_epu8(cE, cF);
3898 c0 = _mm_avg_epu8(c0, c2);
3899 c4 = _mm_avg_epu8(c4, c6);
3900 c8 = _mm_avg_epu8(c8, cA);
3901 cC = _mm_avg_epu8(cC, cE);
3902 c0 = _mm_avg_epu8(c0, c4);
3903 c8 = _mm_avg_epu8(c8, cC);
3904 c0 = _mm_avg_epu8(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04003905
Nicolas Capens47dc8672017-04-25 12:54:39 -04003906 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3907 }
3908
3909 source0 += pitch;
3910 source1 += pitch;
3911 source2 += pitch;
3912 source3 += pitch;
3913 source4 += pitch;
3914 source5 += pitch;
3915 source6 += pitch;
3916 source7 += pitch;
3917 source8 += pitch;
3918 source9 += pitch;
3919 sourceA += pitch;
3920 sourceB += pitch;
3921 sourceC += pitch;
3922 sourceD += pitch;
3923 sourceE += pitch;
3924 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04003925 }
John Bauman89401822014-05-06 15:04:28 -04003926 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003927 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04003928 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003929 else
3930 #endif
John Bauman89401822014-05-06 15:04:28 -04003931 {
3932 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3933
3934 if(internal.depth == 2)
3935 {
3936 for(int y = 0; y < height; y++)
3937 {
3938 for(int x = 0; x < width; x++)
3939 {
3940 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3941 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3942
3943 c0 = AVERAGE(c0, c1);
3944
3945 *(unsigned int*)(source0 + 4 * x) = c0;
3946 }
3947
3948 source0 += pitch;
3949 source1 += pitch;
3950 }
3951 }
3952 else if(internal.depth == 4)
3953 {
3954 for(int y = 0; y < height; y++)
3955 {
3956 for(int x = 0; x < width; x++)
3957 {
3958 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3959 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3960 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3961 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3962
3963 c0 = AVERAGE(c0, c1);
3964 c2 = AVERAGE(c2, c3);
3965 c0 = AVERAGE(c0, c2);
3966
3967 *(unsigned int*)(source0 + 4 * x) = c0;
3968 }
3969
3970 source0 += pitch;
3971 source1 += pitch;
3972 source2 += pitch;
3973 source3 += pitch;
3974 }
3975 }
3976 else if(internal.depth == 8)
3977 {
3978 for(int y = 0; y < height; y++)
3979 {
3980 for(int x = 0; x < width; x++)
3981 {
3982 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3983 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3984 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3985 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3986 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3987 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3988 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3989 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3990
3991 c0 = AVERAGE(c0, c1);
3992 c2 = AVERAGE(c2, c3);
3993 c4 = AVERAGE(c4, c5);
3994 c6 = AVERAGE(c6, c7);
3995 c0 = AVERAGE(c0, c2);
3996 c4 = AVERAGE(c4, c6);
3997 c0 = AVERAGE(c0, c4);
3998
3999 *(unsigned int*)(source0 + 4 * x) = c0;
4000 }
4001
4002 source0 += pitch;
4003 source1 += pitch;
4004 source2 += pitch;
4005 source3 += pitch;
4006 source4 += pitch;
4007 source5 += pitch;
4008 source6 += pitch;
4009 source7 += pitch;
4010 }
4011 }
4012 else if(internal.depth == 16)
4013 {
4014 for(int y = 0; y < height; y++)
4015 {
4016 for(int x = 0; x < width; x++)
4017 {
4018 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4019 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4020 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4021 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4022 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4023 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4024 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4025 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4026 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4027 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4028 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4029 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4030 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4031 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4032 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4033 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4034
4035 c0 = AVERAGE(c0, c1);
4036 c2 = AVERAGE(c2, c3);
4037 c4 = AVERAGE(c4, c5);
4038 c6 = AVERAGE(c6, c7);
4039 c8 = AVERAGE(c8, c9);
4040 cA = AVERAGE(cA, cB);
4041 cC = AVERAGE(cC, cD);
4042 cE = AVERAGE(cE, cF);
4043 c0 = AVERAGE(c0, c2);
4044 c4 = AVERAGE(c4, c6);
4045 c8 = AVERAGE(c8, cA);
4046 cC = AVERAGE(cC, cE);
4047 c0 = AVERAGE(c0, c4);
4048 c8 = AVERAGE(c8, cC);
4049 c0 = AVERAGE(c0, c8);
4050
4051 *(unsigned int*)(source0 + 4 * x) = c0;
4052 }
4053
4054 source0 += pitch;
4055 source1 += pitch;
4056 source2 += pitch;
4057 source3 += pitch;
4058 source4 += pitch;
4059 source5 += pitch;
4060 source6 += pitch;
4061 source7 += pitch;
4062 source8 += pitch;
4063 source9 += pitch;
4064 sourceA += pitch;
4065 sourceB += pitch;
4066 sourceC += pitch;
4067 sourceD += pitch;
4068 sourceE += pitch;
4069 sourceF += pitch;
4070 }
4071 }
4072 else ASSERT(false);
4073
4074 #undef AVERAGE
4075 }
4076 }
4077 else if(internal.format == FORMAT_G16R16)
4078 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004079
4080 #if defined(__i386__) || defined(__x86_64__)
4081 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004082 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004083 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004084 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004085 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004086 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004087 for(int x = 0; x < width; x += 4)
4088 {
4089 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4090 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004091
Nicolas Capens47dc8672017-04-25 12:54:39 -04004092 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004093
Nicolas Capens47dc8672017-04-25 12:54:39 -04004094 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4095 }
4096
4097 source0 += pitch;
4098 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004099 }
John Bauman89401822014-05-06 15:04:28 -04004100 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004101 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004102 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004103 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004104 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004105 for(int x = 0; x < width; x += 4)
4106 {
4107 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4108 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4109 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4110 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004111
Nicolas Capens47dc8672017-04-25 12:54:39 -04004112 c0 = _mm_avg_epu16(c0, c1);
4113 c2 = _mm_avg_epu16(c2, c3);
4114 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004115
Nicolas Capens47dc8672017-04-25 12:54:39 -04004116 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4117 }
4118
4119 source0 += pitch;
4120 source1 += pitch;
4121 source2 += pitch;
4122 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004123 }
John Bauman89401822014-05-06 15:04:28 -04004124 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004125 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004126 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004127 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004128 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004129 for(int x = 0; x < width; x += 4)
4130 {
4131 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4132 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4133 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4134 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4135 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4136 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4137 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4138 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004139
Nicolas Capens47dc8672017-04-25 12:54:39 -04004140 c0 = _mm_avg_epu16(c0, c1);
4141 c2 = _mm_avg_epu16(c2, c3);
4142 c4 = _mm_avg_epu16(c4, c5);
4143 c6 = _mm_avg_epu16(c6, c7);
4144 c0 = _mm_avg_epu16(c0, c2);
4145 c4 = _mm_avg_epu16(c4, c6);
4146 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004147
Nicolas Capens47dc8672017-04-25 12:54:39 -04004148 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4149 }
4150
4151 source0 += pitch;
4152 source1 += pitch;
4153 source2 += pitch;
4154 source3 += pitch;
4155 source4 += pitch;
4156 source5 += pitch;
4157 source6 += pitch;
4158 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004159 }
John Bauman89401822014-05-06 15:04:28 -04004160 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004161 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004162 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004163 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004164 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004165 for(int x = 0; x < width; x += 4)
4166 {
4167 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4168 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4169 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4170 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4171 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4172 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4173 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4174 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4175 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4176 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4177 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4178 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4179 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4180 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4181 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4182 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004183
Nicolas Capens47dc8672017-04-25 12:54:39 -04004184 c0 = _mm_avg_epu16(c0, c1);
4185 c2 = _mm_avg_epu16(c2, c3);
4186 c4 = _mm_avg_epu16(c4, c5);
4187 c6 = _mm_avg_epu16(c6, c7);
4188 c8 = _mm_avg_epu16(c8, c9);
4189 cA = _mm_avg_epu16(cA, cB);
4190 cC = _mm_avg_epu16(cC, cD);
4191 cE = _mm_avg_epu16(cE, cF);
4192 c0 = _mm_avg_epu16(c0, c2);
4193 c4 = _mm_avg_epu16(c4, c6);
4194 c8 = _mm_avg_epu16(c8, cA);
4195 cC = _mm_avg_epu16(cC, cE);
4196 c0 = _mm_avg_epu16(c0, c4);
4197 c8 = _mm_avg_epu16(c8, cC);
4198 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004199
Nicolas Capens47dc8672017-04-25 12:54:39 -04004200 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4201 }
4202
4203 source0 += pitch;
4204 source1 += pitch;
4205 source2 += pitch;
4206 source3 += pitch;
4207 source4 += pitch;
4208 source5 += pitch;
4209 source6 += pitch;
4210 source7 += pitch;
4211 source8 += pitch;
4212 source9 += pitch;
4213 sourceA += pitch;
4214 sourceB += pitch;
4215 sourceC += pitch;
4216 sourceD += pitch;
4217 sourceE += pitch;
4218 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004219 }
John Bauman89401822014-05-06 15:04:28 -04004220 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004221 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004222 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004223 else
4224 #endif
John Bauman89401822014-05-06 15:04:28 -04004225 {
4226 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4227
4228 if(internal.depth == 2)
4229 {
4230 for(int y = 0; y < height; y++)
4231 {
4232 for(int x = 0; x < width; x++)
4233 {
4234 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4235 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4236
4237 c0 = AVERAGE(c0, c1);
4238
4239 *(unsigned int*)(source0 + 4 * x) = c0;
4240 }
4241
4242 source0 += pitch;
4243 source1 += pitch;
4244 }
4245 }
4246 else if(internal.depth == 4)
4247 {
4248 for(int y = 0; y < height; y++)
4249 {
4250 for(int x = 0; x < width; x++)
4251 {
4252 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4253 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4254 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4255 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4256
4257 c0 = AVERAGE(c0, c1);
4258 c2 = AVERAGE(c2, c3);
4259 c0 = AVERAGE(c0, c2);
4260
4261 *(unsigned int*)(source0 + 4 * x) = c0;
4262 }
4263
4264 source0 += pitch;
4265 source1 += pitch;
4266 source2 += pitch;
4267 source3 += pitch;
4268 }
4269 }
4270 else if(internal.depth == 8)
4271 {
4272 for(int y = 0; y < height; y++)
4273 {
4274 for(int x = 0; x < width; x++)
4275 {
4276 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4277 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4278 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4279 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4280 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4281 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4282 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4283 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4284
4285 c0 = AVERAGE(c0, c1);
4286 c2 = AVERAGE(c2, c3);
4287 c4 = AVERAGE(c4, c5);
4288 c6 = AVERAGE(c6, c7);
4289 c0 = AVERAGE(c0, c2);
4290 c4 = AVERAGE(c4, c6);
4291 c0 = AVERAGE(c0, c4);
4292
4293 *(unsigned int*)(source0 + 4 * x) = c0;
4294 }
4295
4296 source0 += pitch;
4297 source1 += pitch;
4298 source2 += pitch;
4299 source3 += pitch;
4300 source4 += pitch;
4301 source5 += pitch;
4302 source6 += pitch;
4303 source7 += pitch;
4304 }
4305 }
4306 else if(internal.depth == 16)
4307 {
4308 for(int y = 0; y < height; y++)
4309 {
4310 for(int x = 0; x < width; x++)
4311 {
4312 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4313 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4314 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4315 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4316 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4317 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4318 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4319 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4320 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4321 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4322 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4323 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4324 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4325 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4326 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4327 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4328
4329 c0 = AVERAGE(c0, c1);
4330 c2 = AVERAGE(c2, c3);
4331 c4 = AVERAGE(c4, c5);
4332 c6 = AVERAGE(c6, c7);
4333 c8 = AVERAGE(c8, c9);
4334 cA = AVERAGE(cA, cB);
4335 cC = AVERAGE(cC, cD);
4336 cE = AVERAGE(cE, cF);
4337 c0 = AVERAGE(c0, c2);
4338 c4 = AVERAGE(c4, c6);
4339 c8 = AVERAGE(c8, cA);
4340 cC = AVERAGE(cC, cE);
4341 c0 = AVERAGE(c0, c4);
4342 c8 = AVERAGE(c8, cC);
4343 c0 = AVERAGE(c0, c8);
4344
4345 *(unsigned int*)(source0 + 4 * x) = c0;
4346 }
4347
4348 source0 += pitch;
4349 source1 += pitch;
4350 source2 += pitch;
4351 source3 += pitch;
4352 source4 += pitch;
4353 source5 += pitch;
4354 source6 += pitch;
4355 source7 += pitch;
4356 source8 += pitch;
4357 source9 += pitch;
4358 sourceA += pitch;
4359 sourceB += pitch;
4360 sourceC += pitch;
4361 sourceD += pitch;
4362 sourceE += pitch;
4363 sourceF += pitch;
4364 }
4365 }
4366 else ASSERT(false);
4367
4368 #undef AVERAGE
4369 }
4370 }
4371 else if(internal.format == FORMAT_A16B16G16R16)
4372 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004373 #if defined(__i386__) || defined(__x86_64__)
4374 if(CPUID::supportsSSE2() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004375 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004376 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004377 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004378 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004379 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004380 for(int x = 0; x < width; x += 2)
4381 {
4382 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4383 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004384
Nicolas Capens47dc8672017-04-25 12:54:39 -04004385 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004386
Nicolas Capens47dc8672017-04-25 12:54:39 -04004387 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4388 }
4389
4390 source0 += pitch;
4391 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004392 }
John Bauman89401822014-05-06 15:04:28 -04004393 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004394 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004395 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004396 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004397 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004398 for(int x = 0; x < width; x += 2)
4399 {
4400 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4401 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4402 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4403 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004404
Nicolas Capens47dc8672017-04-25 12:54:39 -04004405 c0 = _mm_avg_epu16(c0, c1);
4406 c2 = _mm_avg_epu16(c2, c3);
4407 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004408
Nicolas Capens47dc8672017-04-25 12:54:39 -04004409 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4410 }
4411
4412 source0 += pitch;
4413 source1 += pitch;
4414 source2 += pitch;
4415 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004416 }
John Bauman89401822014-05-06 15:04:28 -04004417 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004418 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004419 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004420 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004421 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004422 for(int x = 0; x < width; x += 2)
4423 {
4424 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4425 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4426 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4427 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4428 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4429 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4430 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4431 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004432
Nicolas Capens47dc8672017-04-25 12:54:39 -04004433 c0 = _mm_avg_epu16(c0, c1);
4434 c2 = _mm_avg_epu16(c2, c3);
4435 c4 = _mm_avg_epu16(c4, c5);
4436 c6 = _mm_avg_epu16(c6, c7);
4437 c0 = _mm_avg_epu16(c0, c2);
4438 c4 = _mm_avg_epu16(c4, c6);
4439 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004440
Nicolas Capens47dc8672017-04-25 12:54:39 -04004441 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4442 }
4443
4444 source0 += pitch;
4445 source1 += pitch;
4446 source2 += pitch;
4447 source3 += pitch;
4448 source4 += pitch;
4449 source5 += pitch;
4450 source6 += pitch;
4451 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004452 }
John Bauman89401822014-05-06 15:04:28 -04004453 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004454 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004455 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004456 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004457 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004458 for(int x = 0; x < width; x += 2)
4459 {
4460 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4461 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4462 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4463 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4464 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4465 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4466 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4467 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4468 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4469 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4470 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4471 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4472 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4473 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4474 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4475 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04004476
Nicolas Capens47dc8672017-04-25 12:54:39 -04004477 c0 = _mm_avg_epu16(c0, c1);
4478 c2 = _mm_avg_epu16(c2, c3);
4479 c4 = _mm_avg_epu16(c4, c5);
4480 c6 = _mm_avg_epu16(c6, c7);
4481 c8 = _mm_avg_epu16(c8, c9);
4482 cA = _mm_avg_epu16(cA, cB);
4483 cC = _mm_avg_epu16(cC, cD);
4484 cE = _mm_avg_epu16(cE, cF);
4485 c0 = _mm_avg_epu16(c0, c2);
4486 c4 = _mm_avg_epu16(c4, c6);
4487 c8 = _mm_avg_epu16(c8, cA);
4488 cC = _mm_avg_epu16(cC, cE);
4489 c0 = _mm_avg_epu16(c0, c4);
4490 c8 = _mm_avg_epu16(c8, cC);
4491 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004492
Nicolas Capens47dc8672017-04-25 12:54:39 -04004493 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4494 }
4495
4496 source0 += pitch;
4497 source1 += pitch;
4498 source2 += pitch;
4499 source3 += pitch;
4500 source4 += pitch;
4501 source5 += pitch;
4502 source6 += pitch;
4503 source7 += pitch;
4504 source8 += pitch;
4505 source9 += pitch;
4506 sourceA += pitch;
4507 sourceB += pitch;
4508 sourceC += pitch;
4509 sourceD += pitch;
4510 sourceE += pitch;
4511 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004512 }
John Bauman89401822014-05-06 15:04:28 -04004513 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004514 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004515 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004516 else
4517 #endif
John Bauman89401822014-05-06 15:04:28 -04004518 {
4519 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4520
4521 if(internal.depth == 2)
4522 {
4523 for(int y = 0; y < height; y++)
4524 {
4525 for(int x = 0; x < 2 * width; x++)
4526 {
4527 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4528 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4529
4530 c0 = AVERAGE(c0, c1);
4531
4532 *(unsigned int*)(source0 + 4 * x) = c0;
4533 }
4534
4535 source0 += pitch;
4536 source1 += pitch;
4537 }
4538 }
4539 else if(internal.depth == 4)
4540 {
4541 for(int y = 0; y < height; y++)
4542 {
4543 for(int x = 0; x < 2 * width; x++)
4544 {
4545 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4546 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4547 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4548 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4549
4550 c0 = AVERAGE(c0, c1);
4551 c2 = AVERAGE(c2, c3);
4552 c0 = AVERAGE(c0, c2);
4553
4554 *(unsigned int*)(source0 + 4 * x) = c0;
4555 }
4556
4557 source0 += pitch;
4558 source1 += pitch;
4559 source2 += pitch;
4560 source3 += pitch;
4561 }
4562 }
4563 else if(internal.depth == 8)
4564 {
4565 for(int y = 0; y < height; y++)
4566 {
4567 for(int x = 0; x < 2 * width; x++)
4568 {
4569 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4570 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4571 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4572 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4573 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4574 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4575 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4576 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4577
4578 c0 = AVERAGE(c0, c1);
4579 c2 = AVERAGE(c2, c3);
4580 c4 = AVERAGE(c4, c5);
4581 c6 = AVERAGE(c6, c7);
4582 c0 = AVERAGE(c0, c2);
4583 c4 = AVERAGE(c4, c6);
4584 c0 = AVERAGE(c0, c4);
4585
4586 *(unsigned int*)(source0 + 4 * x) = c0;
4587 }
4588
4589 source0 += pitch;
4590 source1 += pitch;
4591 source2 += pitch;
4592 source3 += pitch;
4593 source4 += pitch;
4594 source5 += pitch;
4595 source6 += pitch;
4596 source7 += pitch;
4597 }
4598 }
4599 else if(internal.depth == 16)
4600 {
4601 for(int y = 0; y < height; y++)
4602 {
4603 for(int x = 0; x < 2 * width; x++)
4604 {
4605 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4606 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4607 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4608 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4609 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4610 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4611 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4612 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4613 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4614 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4615 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4616 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4617 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4618 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4619 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4620 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4621
4622 c0 = AVERAGE(c0, c1);
4623 c2 = AVERAGE(c2, c3);
4624 c4 = AVERAGE(c4, c5);
4625 c6 = AVERAGE(c6, c7);
4626 c8 = AVERAGE(c8, c9);
4627 cA = AVERAGE(cA, cB);
4628 cC = AVERAGE(cC, cD);
4629 cE = AVERAGE(cE, cF);
4630 c0 = AVERAGE(c0, c2);
4631 c4 = AVERAGE(c4, c6);
4632 c8 = AVERAGE(c8, cA);
4633 cC = AVERAGE(cC, cE);
4634 c0 = AVERAGE(c0, c4);
4635 c8 = AVERAGE(c8, cC);
4636 c0 = AVERAGE(c0, c8);
4637
4638 *(unsigned int*)(source0 + 4 * x) = c0;
4639 }
4640
4641 source0 += pitch;
4642 source1 += pitch;
4643 source2 += pitch;
4644 source3 += pitch;
4645 source4 += pitch;
4646 source5 += pitch;
4647 source6 += pitch;
4648 source7 += pitch;
4649 source8 += pitch;
4650 source9 += pitch;
4651 sourceA += pitch;
4652 sourceB += pitch;
4653 sourceC += pitch;
4654 sourceD += pitch;
4655 sourceE += pitch;
4656 sourceF += pitch;
4657 }
4658 }
4659 else ASSERT(false);
4660
4661 #undef AVERAGE
4662 }
4663 }
4664 else if(internal.format == FORMAT_R32F)
4665 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004666 #if defined(__i386__) || defined(__x86_64__)
4667 if(CPUID::supportsSSE() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004668 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004669 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004670 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004671 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004672 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004673 for(int x = 0; x < width; x += 4)
4674 {
4675 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4676 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004677
Nicolas Capens47dc8672017-04-25 12:54:39 -04004678 c0 = _mm_add_ps(c0, c1);
4679 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004680
Nicolas Capens47dc8672017-04-25 12:54:39 -04004681 _mm_store_ps((float*)(source0 + 4 * x), c0);
4682 }
4683
4684 source0 += pitch;
4685 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004686 }
John Bauman89401822014-05-06 15:04:28 -04004687 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004688 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004689 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004690 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004691 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004692 for(int x = 0; x < width; x += 4)
4693 {
4694 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4695 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4696 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4697 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004698
Nicolas Capens47dc8672017-04-25 12:54:39 -04004699 c0 = _mm_add_ps(c0, c1);
4700 c2 = _mm_add_ps(c2, c3);
4701 c0 = _mm_add_ps(c0, c2);
4702 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004703
Nicolas Capens47dc8672017-04-25 12:54:39 -04004704 _mm_store_ps((float*)(source0 + 4 * x), c0);
4705 }
4706
4707 source0 += pitch;
4708 source1 += pitch;
4709 source2 += pitch;
4710 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004711 }
John Bauman89401822014-05-06 15:04:28 -04004712 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004713 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004714 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004715 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004716 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004717 for(int x = 0; x < width; x += 4)
4718 {
4719 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4720 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4721 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4722 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4723 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4724 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4725 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4726 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004727
Nicolas Capens47dc8672017-04-25 12:54:39 -04004728 c0 = _mm_add_ps(c0, c1);
4729 c2 = _mm_add_ps(c2, c3);
4730 c4 = _mm_add_ps(c4, c5);
4731 c6 = _mm_add_ps(c6, c7);
4732 c0 = _mm_add_ps(c0, c2);
4733 c4 = _mm_add_ps(c4, c6);
4734 c0 = _mm_add_ps(c0, c4);
4735 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004736
Nicolas Capens47dc8672017-04-25 12:54:39 -04004737 _mm_store_ps((float*)(source0 + 4 * x), c0);
4738 }
4739
4740 source0 += pitch;
4741 source1 += pitch;
4742 source2 += pitch;
4743 source3 += pitch;
4744 source4 += pitch;
4745 source5 += pitch;
4746 source6 += pitch;
4747 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004748 }
John Bauman89401822014-05-06 15:04:28 -04004749 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004750 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004751 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004752 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004753 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004754 for(int x = 0; x < width; x += 4)
4755 {
4756 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4757 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4758 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4759 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4760 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4761 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4762 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4763 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4764 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4765 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4766 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4767 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4768 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4769 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4770 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4771 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004772
Nicolas Capens47dc8672017-04-25 12:54:39 -04004773 c0 = _mm_add_ps(c0, c1);
4774 c2 = _mm_add_ps(c2, c3);
4775 c4 = _mm_add_ps(c4, c5);
4776 c6 = _mm_add_ps(c6, c7);
4777 c8 = _mm_add_ps(c8, c9);
4778 cA = _mm_add_ps(cA, cB);
4779 cC = _mm_add_ps(cC, cD);
4780 cE = _mm_add_ps(cE, cF);
4781 c0 = _mm_add_ps(c0, c2);
4782 c4 = _mm_add_ps(c4, c6);
4783 c8 = _mm_add_ps(c8, cA);
4784 cC = _mm_add_ps(cC, cE);
4785 c0 = _mm_add_ps(c0, c4);
4786 c8 = _mm_add_ps(c8, cC);
4787 c0 = _mm_add_ps(c0, c8);
4788 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04004789
Nicolas Capens47dc8672017-04-25 12:54:39 -04004790 _mm_store_ps((float*)(source0 + 4 * x), c0);
4791 }
4792
4793 source0 += pitch;
4794 source1 += pitch;
4795 source2 += pitch;
4796 source3 += pitch;
4797 source4 += pitch;
4798 source5 += pitch;
4799 source6 += pitch;
4800 source7 += pitch;
4801 source8 += pitch;
4802 source9 += pitch;
4803 sourceA += pitch;
4804 sourceB += pitch;
4805 sourceC += pitch;
4806 sourceD += pitch;
4807 sourceE += pitch;
4808 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004809 }
John Bauman89401822014-05-06 15:04:28 -04004810 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004811 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004812 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004813 else
4814 #endif
John Bauman89401822014-05-06 15:04:28 -04004815 {
4816 if(internal.depth == 2)
4817 {
4818 for(int y = 0; y < height; y++)
4819 {
4820 for(int x = 0; x < width; x++)
4821 {
4822 float c0 = *(float*)(source0 + 4 * x);
4823 float c1 = *(float*)(source1 + 4 * x);
4824
4825 c0 = c0 + c1;
4826 c0 *= 1.0f / 2.0f;
4827
4828 *(float*)(source0 + 4 * x) = c0;
4829 }
4830
4831 source0 += pitch;
4832 source1 += pitch;
4833 }
4834 }
4835 else if(internal.depth == 4)
4836 {
4837 for(int y = 0; y < height; y++)
4838 {
4839 for(int x = 0; x < width; x++)
4840 {
4841 float c0 = *(float*)(source0 + 4 * x);
4842 float c1 = *(float*)(source1 + 4 * x);
4843 float c2 = *(float*)(source2 + 4 * x);
4844 float c3 = *(float*)(source3 + 4 * x);
4845
4846 c0 = c0 + c1;
4847 c2 = c2 + c3;
4848 c0 = c0 + c2;
4849 c0 *= 1.0f / 4.0f;
4850
4851 *(float*)(source0 + 4 * x) = c0;
4852 }
4853
4854 source0 += pitch;
4855 source1 += pitch;
4856 source2 += pitch;
4857 source3 += pitch;
4858 }
4859 }
4860 else if(internal.depth == 8)
4861 {
4862 for(int y = 0; y < height; y++)
4863 {
4864 for(int x = 0; x < width; x++)
4865 {
4866 float c0 = *(float*)(source0 + 4 * x);
4867 float c1 = *(float*)(source1 + 4 * x);
4868 float c2 = *(float*)(source2 + 4 * x);
4869 float c3 = *(float*)(source3 + 4 * x);
4870 float c4 = *(float*)(source4 + 4 * x);
4871 float c5 = *(float*)(source5 + 4 * x);
4872 float c6 = *(float*)(source6 + 4 * x);
4873 float c7 = *(float*)(source7 + 4 * x);
4874
4875 c0 = c0 + c1;
4876 c2 = c2 + c3;
4877 c4 = c4 + c5;
4878 c6 = c6 + c7;
4879 c0 = c0 + c2;
4880 c4 = c4 + c6;
4881 c0 = c0 + c4;
4882 c0 *= 1.0f / 8.0f;
4883
4884 *(float*)(source0 + 4 * x) = c0;
4885 }
4886
4887 source0 += pitch;
4888 source1 += pitch;
4889 source2 += pitch;
4890 source3 += pitch;
4891 source4 += pitch;
4892 source5 += pitch;
4893 source6 += pitch;
4894 source7 += pitch;
4895 }
4896 }
4897 else if(internal.depth == 16)
4898 {
4899 for(int y = 0; y < height; y++)
4900 {
4901 for(int x = 0; x < width; x++)
4902 {
4903 float c0 = *(float*)(source0 + 4 * x);
4904 float c1 = *(float*)(source1 + 4 * x);
4905 float c2 = *(float*)(source2 + 4 * x);
4906 float c3 = *(float*)(source3 + 4 * x);
4907 float c4 = *(float*)(source4 + 4 * x);
4908 float c5 = *(float*)(source5 + 4 * x);
4909 float c6 = *(float*)(source6 + 4 * x);
4910 float c7 = *(float*)(source7 + 4 * x);
4911 float c8 = *(float*)(source8 + 4 * x);
4912 float c9 = *(float*)(source9 + 4 * x);
4913 float cA = *(float*)(sourceA + 4 * x);
4914 float cB = *(float*)(sourceB + 4 * x);
4915 float cC = *(float*)(sourceC + 4 * x);
4916 float cD = *(float*)(sourceD + 4 * x);
4917 float cE = *(float*)(sourceE + 4 * x);
4918 float cF = *(float*)(sourceF + 4 * x);
4919
4920 c0 = c0 + c1;
4921 c2 = c2 + c3;
4922 c4 = c4 + c5;
4923 c6 = c6 + c7;
4924 c8 = c8 + c9;
4925 cA = cA + cB;
4926 cC = cC + cD;
4927 cE = cE + cF;
4928 c0 = c0 + c2;
4929 c4 = c4 + c6;
4930 c8 = c8 + cA;
4931 cC = cC + cE;
4932 c0 = c0 + c4;
4933 c8 = c8 + cC;
4934 c0 = c0 + c8;
4935 c0 *= 1.0f / 16.0f;
4936
4937 *(float*)(source0 + 4 * x) = c0;
4938 }
4939
4940 source0 += pitch;
4941 source1 += pitch;
4942 source2 += pitch;
4943 source3 += pitch;
4944 source4 += pitch;
4945 source5 += pitch;
4946 source6 += pitch;
4947 source7 += pitch;
4948 source8 += pitch;
4949 source9 += pitch;
4950 sourceA += pitch;
4951 sourceB += pitch;
4952 sourceC += pitch;
4953 sourceD += pitch;
4954 sourceE += pitch;
4955 sourceF += pitch;
4956 }
4957 }
4958 else ASSERT(false);
4959 }
4960 }
4961 else if(internal.format == FORMAT_G32R32F)
4962 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004963 #if defined(__i386__) || defined(__x86_64__)
4964 if(CPUID::supportsSSE() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004965 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004966 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004967 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004968 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004969 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004970 for(int x = 0; x < width; x += 2)
4971 {
4972 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4973 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004974
Nicolas Capens47dc8672017-04-25 12:54:39 -04004975 c0 = _mm_add_ps(c0, c1);
4976 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004977
Nicolas Capens47dc8672017-04-25 12:54:39 -04004978 _mm_store_ps((float*)(source0 + 8 * x), c0);
4979 }
4980
4981 source0 += pitch;
4982 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004983 }
John Bauman89401822014-05-06 15:04:28 -04004984 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004985 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004986 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004987 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004988 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004989 for(int x = 0; x < width; x += 2)
4990 {
4991 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4992 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4993 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4994 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004995
Nicolas Capens47dc8672017-04-25 12:54:39 -04004996 c0 = _mm_add_ps(c0, c1);
4997 c2 = _mm_add_ps(c2, c3);
4998 c0 = _mm_add_ps(c0, c2);
4999 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005000
Nicolas Capens47dc8672017-04-25 12:54:39 -04005001 _mm_store_ps((float*)(source0 + 8 * x), c0);
5002 }
5003
5004 source0 += pitch;
5005 source1 += pitch;
5006 source2 += pitch;
5007 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005008 }
John Bauman89401822014-05-06 15:04:28 -04005009 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005010 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04005011 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005012 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005013 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005014 for(int x = 0; x < width; x += 2)
5015 {
5016 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5017 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5018 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5019 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5020 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5021 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5022 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5023 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005024
Nicolas Capens47dc8672017-04-25 12:54:39 -04005025 c0 = _mm_add_ps(c0, c1);
5026 c2 = _mm_add_ps(c2, c3);
5027 c4 = _mm_add_ps(c4, c5);
5028 c6 = _mm_add_ps(c6, c7);
5029 c0 = _mm_add_ps(c0, c2);
5030 c4 = _mm_add_ps(c4, c6);
5031 c0 = _mm_add_ps(c0, c4);
5032 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005033
Nicolas Capens47dc8672017-04-25 12:54:39 -04005034 _mm_store_ps((float*)(source0 + 8 * x), c0);
5035 }
5036
5037 source0 += pitch;
5038 source1 += pitch;
5039 source2 += pitch;
5040 source3 += pitch;
5041 source4 += pitch;
5042 source5 += pitch;
5043 source6 += pitch;
5044 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005045 }
John Bauman89401822014-05-06 15:04:28 -04005046 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005047 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04005048 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005049 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005050 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005051 for(int x = 0; x < width; x += 2)
5052 {
5053 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5054 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5055 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5056 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5057 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5058 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5059 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5060 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5061 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5062 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5063 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5064 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5065 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5066 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5067 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5068 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04005069
Nicolas Capens47dc8672017-04-25 12:54:39 -04005070 c0 = _mm_add_ps(c0, c1);
5071 c2 = _mm_add_ps(c2, c3);
5072 c4 = _mm_add_ps(c4, c5);
5073 c6 = _mm_add_ps(c6, c7);
5074 c8 = _mm_add_ps(c8, c9);
5075 cA = _mm_add_ps(cA, cB);
5076 cC = _mm_add_ps(cC, cD);
5077 cE = _mm_add_ps(cE, cF);
5078 c0 = _mm_add_ps(c0, c2);
5079 c4 = _mm_add_ps(c4, c6);
5080 c8 = _mm_add_ps(c8, cA);
5081 cC = _mm_add_ps(cC, cE);
5082 c0 = _mm_add_ps(c0, c4);
5083 c8 = _mm_add_ps(c8, cC);
5084 c0 = _mm_add_ps(c0, c8);
5085 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005086
Nicolas Capens47dc8672017-04-25 12:54:39 -04005087 _mm_store_ps((float*)(source0 + 8 * x), c0);
5088 }
5089
5090 source0 += pitch;
5091 source1 += pitch;
5092 source2 += pitch;
5093 source3 += pitch;
5094 source4 += pitch;
5095 source5 += pitch;
5096 source6 += pitch;
5097 source7 += pitch;
5098 source8 += pitch;
5099 source9 += pitch;
5100 sourceA += pitch;
5101 sourceB += pitch;
5102 sourceC += pitch;
5103 sourceD += pitch;
5104 sourceE += pitch;
5105 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005106 }
John Bauman89401822014-05-06 15:04:28 -04005107 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005108 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005109 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005110 else
5111 #endif
John Bauman89401822014-05-06 15:04:28 -04005112 {
5113 if(internal.depth == 2)
5114 {
5115 for(int y = 0; y < height; y++)
5116 {
5117 for(int x = 0; x < 2 * width; x++)
5118 {
5119 float c0 = *(float*)(source0 + 4 * x);
5120 float c1 = *(float*)(source1 + 4 * x);
5121
5122 c0 = c0 + c1;
5123 c0 *= 1.0f / 2.0f;
5124
5125 *(float*)(source0 + 4 * x) = c0;
5126 }
5127
5128 source0 += pitch;
5129 source1 += pitch;
5130 }
5131 }
5132 else if(internal.depth == 4)
5133 {
5134 for(int y = 0; y < height; y++)
5135 {
5136 for(int x = 0; x < 2 * width; x++)
5137 {
5138 float c0 = *(float*)(source0 + 4 * x);
5139 float c1 = *(float*)(source1 + 4 * x);
5140 float c2 = *(float*)(source2 + 4 * x);
5141 float c3 = *(float*)(source3 + 4 * x);
5142
5143 c0 = c0 + c1;
5144 c2 = c2 + c3;
5145 c0 = c0 + c2;
5146 c0 *= 1.0f / 4.0f;
5147
5148 *(float*)(source0 + 4 * x) = c0;
5149 }
5150
5151 source0 += pitch;
5152 source1 += pitch;
5153 source2 += pitch;
5154 source3 += pitch;
5155 }
5156 }
5157 else if(internal.depth == 8)
5158 {
5159 for(int y = 0; y < height; y++)
5160 {
5161 for(int x = 0; x < 2 * width; x++)
5162 {
5163 float c0 = *(float*)(source0 + 4 * x);
5164 float c1 = *(float*)(source1 + 4 * x);
5165 float c2 = *(float*)(source2 + 4 * x);
5166 float c3 = *(float*)(source3 + 4 * x);
5167 float c4 = *(float*)(source4 + 4 * x);
5168 float c5 = *(float*)(source5 + 4 * x);
5169 float c6 = *(float*)(source6 + 4 * x);
5170 float c7 = *(float*)(source7 + 4 * x);
5171
5172 c0 = c0 + c1;
5173 c2 = c2 + c3;
5174 c4 = c4 + c5;
5175 c6 = c6 + c7;
5176 c0 = c0 + c2;
5177 c4 = c4 + c6;
5178 c0 = c0 + c4;
5179 c0 *= 1.0f / 8.0f;
5180
5181 *(float*)(source0 + 4 * x) = c0;
5182 }
5183
5184 source0 += pitch;
5185 source1 += pitch;
5186 source2 += pitch;
5187 source3 += pitch;
5188 source4 += pitch;
5189 source5 += pitch;
5190 source6 += pitch;
5191 source7 += pitch;
5192 }
5193 }
5194 else if(internal.depth == 16)
5195 {
5196 for(int y = 0; y < height; y++)
5197 {
5198 for(int x = 0; x < 2 * width; x++)
5199 {
5200 float c0 = *(float*)(source0 + 4 * x);
5201 float c1 = *(float*)(source1 + 4 * x);
5202 float c2 = *(float*)(source2 + 4 * x);
5203 float c3 = *(float*)(source3 + 4 * x);
5204 float c4 = *(float*)(source4 + 4 * x);
5205 float c5 = *(float*)(source5 + 4 * x);
5206 float c6 = *(float*)(source6 + 4 * x);
5207 float c7 = *(float*)(source7 + 4 * x);
5208 float c8 = *(float*)(source8 + 4 * x);
5209 float c9 = *(float*)(source9 + 4 * x);
5210 float cA = *(float*)(sourceA + 4 * x);
5211 float cB = *(float*)(sourceB + 4 * x);
5212 float cC = *(float*)(sourceC + 4 * x);
5213 float cD = *(float*)(sourceD + 4 * x);
5214 float cE = *(float*)(sourceE + 4 * x);
5215 float cF = *(float*)(sourceF + 4 * x);
5216
5217 c0 = c0 + c1;
5218 c2 = c2 + c3;
5219 c4 = c4 + c5;
5220 c6 = c6 + c7;
5221 c8 = c8 + c9;
5222 cA = cA + cB;
5223 cC = cC + cD;
5224 cE = cE + cF;
5225 c0 = c0 + c2;
5226 c4 = c4 + c6;
5227 c8 = c8 + cA;
5228 cC = cC + cE;
5229 c0 = c0 + c4;
5230 c8 = c8 + cC;
5231 c0 = c0 + c8;
5232 c0 *= 1.0f / 16.0f;
5233
5234 *(float*)(source0 + 4 * x) = c0;
5235 }
5236
5237 source0 += pitch;
5238 source1 += pitch;
5239 source2 += pitch;
5240 source3 += pitch;
5241 source4 += pitch;
5242 source5 += pitch;
5243 source6 += pitch;
5244 source7 += pitch;
5245 source8 += pitch;
5246 source9 += pitch;
5247 sourceA += pitch;
5248 sourceB += pitch;
5249 sourceC += pitch;
5250 sourceD += pitch;
5251 sourceE += pitch;
5252 sourceF += pitch;
5253 }
5254 }
5255 else ASSERT(false);
5256 }
5257 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005258 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005259 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005260 #if defined(__i386__) || defined(__x86_64__)
5261 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04005262 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005263 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04005264 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005265 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005266 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005267 for(int x = 0; x < width; x++)
5268 {
5269 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5270 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005271
Nicolas Capens47dc8672017-04-25 12:54:39 -04005272 c0 = _mm_add_ps(c0, c1);
5273 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005274
Nicolas Capens47dc8672017-04-25 12:54:39 -04005275 _mm_store_ps((float*)(source0 + 16 * x), c0);
5276 }
5277
5278 source0 += pitch;
5279 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005280 }
John Bauman89401822014-05-06 15:04:28 -04005281 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005282 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04005283 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005284 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005285 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005286 for(int x = 0; x < width; x++)
5287 {
5288 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5289 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5290 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5291 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005292
Nicolas Capens47dc8672017-04-25 12:54:39 -04005293 c0 = _mm_add_ps(c0, c1);
5294 c2 = _mm_add_ps(c2, c3);
5295 c0 = _mm_add_ps(c0, c2);
5296 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005297
Nicolas Capens47dc8672017-04-25 12:54:39 -04005298 _mm_store_ps((float*)(source0 + 16 * x), c0);
5299 }
5300
5301 source0 += pitch;
5302 source1 += pitch;
5303 source2 += pitch;
5304 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005305 }
John Bauman89401822014-05-06 15:04:28 -04005306 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005307 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04005308 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005309 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005310 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005311 for(int x = 0; x < width; x++)
5312 {
5313 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5314 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5315 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5316 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5317 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5318 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5319 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5320 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005321
Nicolas Capens47dc8672017-04-25 12:54:39 -04005322 c0 = _mm_add_ps(c0, c1);
5323 c2 = _mm_add_ps(c2, c3);
5324 c4 = _mm_add_ps(c4, c5);
5325 c6 = _mm_add_ps(c6, c7);
5326 c0 = _mm_add_ps(c0, c2);
5327 c4 = _mm_add_ps(c4, c6);
5328 c0 = _mm_add_ps(c0, c4);
5329 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005330
Nicolas Capens47dc8672017-04-25 12:54:39 -04005331 _mm_store_ps((float*)(source0 + 16 * x), c0);
5332 }
5333
5334 source0 += pitch;
5335 source1 += pitch;
5336 source2 += pitch;
5337 source3 += pitch;
5338 source4 += pitch;
5339 source5 += pitch;
5340 source6 += pitch;
5341 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005342 }
John Bauman89401822014-05-06 15:04:28 -04005343 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005344 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04005345 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005346 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005347 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005348 for(int x = 0; x < width; x++)
5349 {
5350 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5351 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5352 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5353 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5354 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5355 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5356 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5357 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5358 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5359 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5360 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5361 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5362 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5363 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5364 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5365 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
John Bauman89401822014-05-06 15:04:28 -04005366
Nicolas Capens47dc8672017-04-25 12:54:39 -04005367 c0 = _mm_add_ps(c0, c1);
5368 c2 = _mm_add_ps(c2, c3);
5369 c4 = _mm_add_ps(c4, c5);
5370 c6 = _mm_add_ps(c6, c7);
5371 c8 = _mm_add_ps(c8, c9);
5372 cA = _mm_add_ps(cA, cB);
5373 cC = _mm_add_ps(cC, cD);
5374 cE = _mm_add_ps(cE, cF);
5375 c0 = _mm_add_ps(c0, c2);
5376 c4 = _mm_add_ps(c4, c6);
5377 c8 = _mm_add_ps(c8, cA);
5378 cC = _mm_add_ps(cC, cE);
5379 c0 = _mm_add_ps(c0, c4);
5380 c8 = _mm_add_ps(c8, cC);
5381 c0 = _mm_add_ps(c0, c8);
5382 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005383
Nicolas Capens47dc8672017-04-25 12:54:39 -04005384 _mm_store_ps((float*)(source0 + 16 * x), c0);
5385 }
5386
5387 source0 += pitch;
5388 source1 += pitch;
5389 source2 += pitch;
5390 source3 += pitch;
5391 source4 += pitch;
5392 source5 += pitch;
5393 source6 += pitch;
5394 source7 += pitch;
5395 source8 += pitch;
5396 source9 += pitch;
5397 sourceA += pitch;
5398 sourceB += pitch;
5399 sourceC += pitch;
5400 sourceD += pitch;
5401 sourceE += pitch;
5402 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005403 }
John Bauman89401822014-05-06 15:04:28 -04005404 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005405 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005406 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005407 else
5408 #endif
John Bauman89401822014-05-06 15:04:28 -04005409 {
5410 if(internal.depth == 2)
5411 {
5412 for(int y = 0; y < height; y++)
5413 {
5414 for(int x = 0; x < 4 * width; x++)
5415 {
5416 float c0 = *(float*)(source0 + 4 * x);
5417 float c1 = *(float*)(source1 + 4 * x);
5418
5419 c0 = c0 + c1;
5420 c0 *= 1.0f / 2.0f;
5421
5422 *(float*)(source0 + 4 * x) = c0;
5423 }
5424
5425 source0 += pitch;
5426 source1 += pitch;
5427 }
5428 }
5429 else if(internal.depth == 4)
5430 {
5431 for(int y = 0; y < height; y++)
5432 {
5433 for(int x = 0; x < 4 * width; x++)
5434 {
5435 float c0 = *(float*)(source0 + 4 * x);
5436 float c1 = *(float*)(source1 + 4 * x);
5437 float c2 = *(float*)(source2 + 4 * x);
5438 float c3 = *(float*)(source3 + 4 * x);
5439
5440 c0 = c0 + c1;
5441 c2 = c2 + c3;
5442 c0 = c0 + c2;
5443 c0 *= 1.0f / 4.0f;
5444
5445 *(float*)(source0 + 4 * x) = c0;
5446 }
5447
5448 source0 += pitch;
5449 source1 += pitch;
5450 source2 += pitch;
5451 source3 += pitch;
5452 }
5453 }
5454 else if(internal.depth == 8)
5455 {
5456 for(int y = 0; y < height; y++)
5457 {
5458 for(int x = 0; x < 4 * width; x++)
5459 {
5460 float c0 = *(float*)(source0 + 4 * x);
5461 float c1 = *(float*)(source1 + 4 * x);
5462 float c2 = *(float*)(source2 + 4 * x);
5463 float c3 = *(float*)(source3 + 4 * x);
5464 float c4 = *(float*)(source4 + 4 * x);
5465 float c5 = *(float*)(source5 + 4 * x);
5466 float c6 = *(float*)(source6 + 4 * x);
5467 float c7 = *(float*)(source7 + 4 * x);
5468
5469 c0 = c0 + c1;
5470 c2 = c2 + c3;
5471 c4 = c4 + c5;
5472 c6 = c6 + c7;
5473 c0 = c0 + c2;
5474 c4 = c4 + c6;
5475 c0 = c0 + c4;
5476 c0 *= 1.0f / 8.0f;
5477
5478 *(float*)(source0 + 4 * x) = c0;
5479 }
5480
5481 source0 += pitch;
5482 source1 += pitch;
5483 source2 += pitch;
5484 source3 += pitch;
5485 source4 += pitch;
5486 source5 += pitch;
5487 source6 += pitch;
5488 source7 += pitch;
5489 }
5490 }
5491 else if(internal.depth == 16)
5492 {
5493 for(int y = 0; y < height; y++)
5494 {
5495 for(int x = 0; x < 4 * width; x++)
5496 {
5497 float c0 = *(float*)(source0 + 4 * x);
5498 float c1 = *(float*)(source1 + 4 * x);
5499 float c2 = *(float*)(source2 + 4 * x);
5500 float c3 = *(float*)(source3 + 4 * x);
5501 float c4 = *(float*)(source4 + 4 * x);
5502 float c5 = *(float*)(source5 + 4 * x);
5503 float c6 = *(float*)(source6 + 4 * x);
5504 float c7 = *(float*)(source7 + 4 * x);
5505 float c8 = *(float*)(source8 + 4 * x);
5506 float c9 = *(float*)(source9 + 4 * x);
5507 float cA = *(float*)(sourceA + 4 * x);
5508 float cB = *(float*)(sourceB + 4 * x);
5509 float cC = *(float*)(sourceC + 4 * x);
5510 float cD = *(float*)(sourceD + 4 * x);
5511 float cE = *(float*)(sourceE + 4 * x);
5512 float cF = *(float*)(sourceF + 4 * x);
5513
5514 c0 = c0 + c1;
5515 c2 = c2 + c3;
5516 c4 = c4 + c5;
5517 c6 = c6 + c7;
5518 c8 = c8 + c9;
5519 cA = cA + cB;
5520 cC = cC + cD;
5521 cE = cE + cF;
5522 c0 = c0 + c2;
5523 c4 = c4 + c6;
5524 c8 = c8 + cA;
5525 cC = cC + cE;
5526 c0 = c0 + c4;
5527 c8 = c8 + cC;
5528 c0 = c0 + c8;
5529 c0 *= 1.0f / 16.0f;
5530
5531 *(float*)(source0 + 4 * x) = c0;
5532 }
5533
5534 source0 += pitch;
5535 source1 += pitch;
5536 source2 += pitch;
5537 source3 += pitch;
5538 source4 += pitch;
5539 source5 += pitch;
5540 source6 += pitch;
5541 source7 += pitch;
5542 source8 += pitch;
5543 source9 += pitch;
5544 sourceA += pitch;
5545 sourceB += pitch;
5546 sourceC += pitch;
5547 sourceD += pitch;
5548 sourceE += pitch;
5549 sourceF += pitch;
5550 }
5551 }
5552 else ASSERT(false);
5553 }
5554 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005555 else if(internal.format == FORMAT_R5G6B5)
5556 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005557 #if defined(__i386__) || defined(__x86_64__)
5558 if(CPUID::supportsSSE2() && (width % 8) == 0)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005559 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005560 if(internal.depth == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005561 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005562 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005563 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005564 for(int x = 0; x < width; x += 8)
5565 {
5566 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5567 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005568
Nicolas Capens47dc8672017-04-25 12:54:39 -04005569 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5570 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5571 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5572 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5573 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5574 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005575
Nicolas Capens47dc8672017-04-25 12:54:39 -04005576 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5577 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5578 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5579 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5580 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005581
Nicolas Capens47dc8672017-04-25 12:54:39 -04005582 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5583 }
5584
5585 source0 += pitch;
5586 source1 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005587 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005588 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005589 else if(internal.depth == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005590 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005591 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005592 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005593 for(int x = 0; x < width; x += 8)
5594 {
5595 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5596 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5597 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5598 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005599
Nicolas Capens47dc8672017-04-25 12:54:39 -04005600 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5601 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5602 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5603 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5604 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5605 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5606 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5607 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5608 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5609 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005610
Nicolas Capens47dc8672017-04-25 12:54:39 -04005611 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5612 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5613 c0 = _mm_avg_epu8(c0, c2);
5614 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5615 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5616 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5617 c1 = _mm_avg_epu16(c1, c3);
5618 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5619 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005620
Nicolas Capens47dc8672017-04-25 12:54:39 -04005621 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5622 }
5623
5624 source0 += pitch;
5625 source1 += pitch;
5626 source2 += pitch;
5627 source3 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005628 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005629 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005630 else if(internal.depth == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005631 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005632 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005633 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005634 for(int x = 0; x < width; x += 8)
5635 {
5636 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5637 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5638 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5639 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5640 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5641 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5642 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5643 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005644
Nicolas Capens47dc8672017-04-25 12:54:39 -04005645 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5646 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5647 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5648 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5649 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5650 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5651 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5652 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5653 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5654 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5655 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5656 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5657 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5658 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5659 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5660 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5661 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5662 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005663
Nicolas Capens47dc8672017-04-25 12:54:39 -04005664 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5665 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5666 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5667 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5668 c0 = _mm_avg_epu8(c0, c2);
5669 c4 = _mm_avg_epu8(c4, c6);
5670 c0 = _mm_avg_epu8(c0, c4);
5671 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5672 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5673 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5674 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5675 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5676 c1 = _mm_avg_epu16(c1, c3);
5677 c5 = _mm_avg_epu16(c5, c7);
5678 c1 = _mm_avg_epu16(c1, c5);
5679 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5680 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005681
Nicolas Capens47dc8672017-04-25 12:54:39 -04005682 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5683 }
5684
5685 source0 += pitch;
5686 source1 += pitch;
5687 source2 += pitch;
5688 source3 += pitch;
5689 source4 += pitch;
5690 source5 += pitch;
5691 source6 += pitch;
5692 source7 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005693 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005694 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005695 else if(internal.depth == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005696 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005697 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005698 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005699 for(int x = 0; x < width; x += 8)
5700 {
5701 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5702 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5703 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5704 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5705 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5706 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5707 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5708 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5709 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5710 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5711 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5712 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5713 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5714 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5715 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5716 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005717
Nicolas Capens47dc8672017-04-25 12:54:39 -04005718 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5719 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5720 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5721 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5722 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5723 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5724 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5725 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5726 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5727 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5728 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5729 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5730 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5731 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5732 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5733 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5734 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5735 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5736 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5737 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5738 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5739 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5740 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5741 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5742 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5743 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5744 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5745 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5746 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5747 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5748 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5749 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5750 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5751 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005752
Nicolas Capens47dc8672017-04-25 12:54:39 -04005753 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5754 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5755 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5756 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5757 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5758 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5759 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5760 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5761 c0 = _mm_avg_epu8(c0, c2);
5762 c4 = _mm_avg_epu8(c4, c6);
5763 c8 = _mm_avg_epu8(c8, cA);
5764 cC = _mm_avg_epu8(cC, cE);
5765 c0 = _mm_avg_epu8(c0, c4);
5766 c8 = _mm_avg_epu8(c8, cC);
5767 c0 = _mm_avg_epu8(c0, c8);
5768 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5769 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5770 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5771 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5772 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5773 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5774 cB = _mm_avg_epu16(cA__g_, cB__g_);
5775 cD = _mm_avg_epu16(cC__g_, cD__g_);
5776 cF = _mm_avg_epu16(cE__g_, cF__g_);
5777 c1 = _mm_avg_epu8(c1, c3);
5778 c5 = _mm_avg_epu8(c5, c7);
5779 c9 = _mm_avg_epu8(c9, cB);
5780 cD = _mm_avg_epu8(cD, cF);
5781 c1 = _mm_avg_epu8(c1, c5);
5782 c9 = _mm_avg_epu8(c9, cD);
5783 c1 = _mm_avg_epu8(c1, c9);
5784 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5785 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005786
Nicolas Capens47dc8672017-04-25 12:54:39 -04005787 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5788 }
5789
5790 source0 += pitch;
5791 source1 += pitch;
5792 source2 += pitch;
5793 source3 += pitch;
5794 source4 += pitch;
5795 source5 += pitch;
5796 source6 += pitch;
5797 source7 += pitch;
5798 source8 += pitch;
5799 source9 += pitch;
5800 sourceA += pitch;
5801 sourceB += pitch;
5802 sourceC += pitch;
5803 sourceD += pitch;
5804 sourceE += pitch;
5805 sourceF += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005806 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005807 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005808 else ASSERT(false);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005809 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005810 else
5811 #endif
Nicolas Capens0e12a922015-09-04 09:18:15 -04005812 {
5813 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
5814
5815 if(internal.depth == 2)
5816 {
5817 for(int y = 0; y < height; y++)
5818 {
5819 for(int x = 0; x < width; x++)
5820 {
5821 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5822 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5823
5824 c0 = AVERAGE(c0, c1);
5825
5826 *(unsigned short*)(source0 + 2 * x) = c0;
5827 }
5828
5829 source0 += pitch;
5830 source1 += pitch;
5831 }
5832 }
5833 else if(internal.depth == 4)
5834 {
5835 for(int y = 0; y < height; y++)
5836 {
5837 for(int x = 0; x < width; x++)
5838 {
5839 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5840 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5841 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5842 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5843
5844 c0 = AVERAGE(c0, c1);
5845 c2 = AVERAGE(c2, c3);
5846 c0 = AVERAGE(c0, c2);
5847
5848 *(unsigned short*)(source0 + 2 * x) = c0;
5849 }
5850
5851 source0 += pitch;
5852 source1 += pitch;
5853 source2 += pitch;
5854 source3 += pitch;
5855 }
5856 }
5857 else if(internal.depth == 8)
5858 {
5859 for(int y = 0; y < height; y++)
5860 {
5861 for(int x = 0; x < width; x++)
5862 {
5863 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5864 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5865 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5866 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5867 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5868 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5869 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5870 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5871
5872 c0 = AVERAGE(c0, c1);
5873 c2 = AVERAGE(c2, c3);
5874 c4 = AVERAGE(c4, c5);
5875 c6 = AVERAGE(c6, c7);
5876 c0 = AVERAGE(c0, c2);
5877 c4 = AVERAGE(c4, c6);
5878 c0 = AVERAGE(c0, c4);
5879
5880 *(unsigned short*)(source0 + 2 * x) = c0;
5881 }
5882
5883 source0 += pitch;
5884 source1 += pitch;
5885 source2 += pitch;
5886 source3 += pitch;
5887 source4 += pitch;
5888 source5 += pitch;
5889 source6 += pitch;
5890 source7 += pitch;
5891 }
5892 }
5893 else if(internal.depth == 16)
5894 {
5895 for(int y = 0; y < height; y++)
5896 {
5897 for(int x = 0; x < width; x++)
5898 {
5899 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5900 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5901 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5902 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5903 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5904 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5905 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5906 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5907 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
5908 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
5909 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
5910 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
5911 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
5912 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
5913 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
5914 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
5915
5916 c0 = AVERAGE(c0, c1);
5917 c2 = AVERAGE(c2, c3);
5918 c4 = AVERAGE(c4, c5);
5919 c6 = AVERAGE(c6, c7);
5920 c8 = AVERAGE(c8, c9);
5921 cA = AVERAGE(cA, cB);
5922 cC = AVERAGE(cC, cD);
5923 cE = AVERAGE(cE, cF);
5924 c0 = AVERAGE(c0, c2);
5925 c4 = AVERAGE(c4, c6);
5926 c8 = AVERAGE(c8, cA);
5927 cC = AVERAGE(cC, cE);
5928 c0 = AVERAGE(c0, c4);
5929 c8 = AVERAGE(c8, cC);
5930 c0 = AVERAGE(c0, c8);
5931
5932 *(unsigned short*)(source0 + 2 * x) = c0;
5933 }
5934
5935 source0 += pitch;
5936 source1 += pitch;
5937 source2 += pitch;
5938 source3 += pitch;
5939 source4 += pitch;
5940 source5 += pitch;
5941 source6 += pitch;
5942 source7 += pitch;
5943 source8 += pitch;
5944 source9 += pitch;
5945 sourceA += pitch;
5946 sourceB += pitch;
5947 sourceC += pitch;
5948 sourceD += pitch;
5949 sourceE += pitch;
5950 sourceF += pitch;
5951 }
5952 }
5953 else ASSERT(false);
5954
5955 #undef AVERAGE
5956 }
5957 }
John Bauman89401822014-05-06 15:04:28 -04005958 else
5959 {
5960 // UNIMPLEMENTED();
5961 }
5962 }
5963}