blob: 899e7e12ccaa51f1133390d8f55ecc3bf23b4987 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
Nicolas Capens47dc8672017-04-25 12:54:39 -040028#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
John Bauman89401822014-05-06 15:04:28 -040032
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
44
45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
46 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -050047 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
John Bauman89401822014-05-06 15:04:28 -040048
Nicolas Capensbfa23b32017-12-11 10:06:37 -050049 for(int i = 0; i < samples; i++)
50 {
51 write(element, color);
52 element += sliceB;
53 }
John Bauman89401822014-05-06 15:04:28 -040054 }
55
56 void Surface::Buffer::write(int x, int y, const Color<float> &color)
57 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -050058 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -040059
Nicolas Capensbfa23b32017-12-11 10:06:37 -050060 for(int i = 0; i < samples; i++)
61 {
62 write(element, color);
63 element += sliceB;
64 }
John Bauman89401822014-05-06 15:04:28 -040065 }
66
67 inline void Surface::Buffer::write(void *element, const Color<float> &color)
68 {
69 switch(format)
70 {
71 case FORMAT_A8:
72 *(unsigned char*)element = unorm<8>(color.a);
73 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040074 case FORMAT_R8I_SNORM:
75 *(char*)element = snorm<8>(color.r);
76 break;
John Bauman89401822014-05-06 15:04:28 -040077 case FORMAT_R8:
78 *(unsigned char*)element = unorm<8>(color.r);
79 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040080 case FORMAT_R8I:
81 *(char*)element = scast<8>(color.r);
82 break;
83 case FORMAT_R8UI:
84 *(unsigned char*)element = ucast<8>(color.r);
85 break;
86 case FORMAT_R16I:
87 *(short*)element = scast<16>(color.r);
88 break;
89 case FORMAT_R16UI:
90 *(unsigned short*)element = ucast<16>(color.r);
91 break;
92 case FORMAT_R32I:
93 *(int*)element = static_cast<int>(color.r);
94 break;
95 case FORMAT_R32UI:
96 *(unsigned int*)element = static_cast<unsigned int>(color.r);
97 break;
John Bauman89401822014-05-06 15:04:28 -040098 case FORMAT_R3G3B2:
99 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
100 break;
101 case FORMAT_A8R3G3B2:
102 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
103 break;
104 case FORMAT_X4R4G4B4:
105 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
106 break;
107 case FORMAT_A4R4G4B4:
108 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
109 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400110 case FORMAT_R4G4B4A4:
111 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
112 break;
John Bauman89401822014-05-06 15:04:28 -0400113 case FORMAT_R5G6B5:
114 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
115 break;
116 case FORMAT_A1R5G5B5:
117 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
118 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400119 case FORMAT_R5G5B5A1:
120 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
121 break;
John Bauman89401822014-05-06 15:04:28 -0400122 case FORMAT_X1R5G5B5:
123 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
124 break;
125 case FORMAT_A8R8G8B8:
126 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
127 break;
128 case FORMAT_X8R8G8B8:
129 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
130 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400131 case FORMAT_A8B8G8R8I_SNORM:
132 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
133 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
134 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
135 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
136 break;
John Bauman89401822014-05-06 15:04:28 -0400137 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400138 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400139 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
140 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400141 case FORMAT_A8B8G8R8I:
142 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
143 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
144 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
145 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
146 break;
147 case FORMAT_A8B8G8R8UI:
148 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
149 break;
150 case FORMAT_X8B8G8R8I_SNORM:
151 *(unsigned int*)element = 0x7F000000 |
152 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
153 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
154 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
155 break;
John Bauman89401822014-05-06 15:04:28 -0400156 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400157 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400158 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
159 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400160 case FORMAT_X8B8G8R8I:
161 *(unsigned int*)element = 0x7F000000 |
162 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
163 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
164 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
165 case FORMAT_X8B8G8R8UI:
166 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
167 break;
John Bauman89401822014-05-06 15:04:28 -0400168 case FORMAT_A2R10G10B10:
169 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
170 break;
171 case FORMAT_A2B10G10R10:
Nicolas Capens5555af42017-12-14 13:14:03 -0500172 case FORMAT_A2B10G10R10UI:
John Bauman89401822014-05-06 15:04:28 -0400173 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
174 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400175 case FORMAT_G8R8I_SNORM:
176 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
177 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
178 break;
John Bauman89401822014-05-06 15:04:28 -0400179 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400180 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
181 break;
182 case FORMAT_G8R8I:
183 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
184 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
185 break;
186 case FORMAT_G8R8UI:
187 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400188 break;
189 case FORMAT_G16R16:
190 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
191 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400192 case FORMAT_G16R16I:
193 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
194 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
195 break;
196 case FORMAT_G16R16UI:
197 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
198 break;
199 case FORMAT_G32R32I:
200 case FORMAT_G32R32UI:
201 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
202 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
203 break;
John Bauman89401822014-05-06 15:04:28 -0400204 case FORMAT_A16B16G16R16:
205 ((unsigned short*)element)[0] = unorm<16>(color.r);
206 ((unsigned short*)element)[1] = unorm<16>(color.g);
207 ((unsigned short*)element)[2] = unorm<16>(color.b);
208 ((unsigned short*)element)[3] = unorm<16>(color.a);
209 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400210 case FORMAT_A16B16G16R16I:
211 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
212 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
213 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
214 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
215 break;
216 case FORMAT_A16B16G16R16UI:
217 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
218 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
219 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
220 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
221 break;
222 case FORMAT_X16B16G16R16I:
223 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
224 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
225 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
226 break;
227 case FORMAT_X16B16G16R16UI:
228 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
229 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
230 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
231 break;
232 case FORMAT_A32B32G32R32I:
233 case FORMAT_A32B32G32R32UI:
234 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
235 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
236 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
237 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
238 break;
239 case FORMAT_X32B32G32R32I:
240 case FORMAT_X32B32G32R32UI:
241 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
242 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
243 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
244 break;
John Bauman89401822014-05-06 15:04:28 -0400245 case FORMAT_V8U8:
246 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
247 break;
248 case FORMAT_L6V5U5:
249 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
250 break;
251 case FORMAT_Q8W8V8U8:
252 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
253 break;
254 case FORMAT_X8L8V8U8:
255 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
256 break;
257 case FORMAT_V16U16:
258 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
259 break;
260 case FORMAT_A2W10V10U10:
261 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
262 break;
263 case FORMAT_A16W16V16U16:
264 ((unsigned short*)element)[0] = snorm<16>(color.r);
265 ((unsigned short*)element)[1] = snorm<16>(color.g);
266 ((unsigned short*)element)[2] = snorm<16>(color.b);
267 ((unsigned short*)element)[3] = unorm<16>(color.a);
268 break;
269 case FORMAT_Q16W16V16U16:
270 ((unsigned short*)element)[0] = snorm<16>(color.r);
271 ((unsigned short*)element)[1] = snorm<16>(color.g);
272 ((unsigned short*)element)[2] = snorm<16>(color.b);
273 ((unsigned short*)element)[3] = snorm<16>(color.a);
274 break;
275 case FORMAT_R8G8B8:
276 ((unsigned char*)element)[0] = unorm<8>(color.b);
277 ((unsigned char*)element)[1] = unorm<8>(color.g);
278 ((unsigned char*)element)[2] = unorm<8>(color.r);
279 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400280 case FORMAT_B8G8R8:
281 ((unsigned char*)element)[0] = unorm<8>(color.r);
282 ((unsigned char*)element)[1] = unorm<8>(color.g);
283 ((unsigned char*)element)[2] = unorm<8>(color.b);
284 break;
John Bauman89401822014-05-06 15:04:28 -0400285 case FORMAT_R16F:
286 *(half*)element = (half)color.r;
287 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400288 case FORMAT_A16F:
289 *(half*)element = (half)color.a;
290 break;
John Bauman89401822014-05-06 15:04:28 -0400291 case FORMAT_G16R16F:
292 ((half*)element)[0] = (half)color.r;
293 ((half*)element)[1] = (half)color.g;
294 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400295 case FORMAT_B16G16R16F:
296 ((half*)element)[0] = (half)color.r;
297 ((half*)element)[1] = (half)color.g;
298 ((half*)element)[2] = (half)color.b;
299 break;
John Bauman89401822014-05-06 15:04:28 -0400300 case FORMAT_A16B16G16R16F:
301 ((half*)element)[0] = (half)color.r;
302 ((half*)element)[1] = (half)color.g;
303 ((half*)element)[2] = (half)color.b;
304 ((half*)element)[3] = (half)color.a;
305 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400306 case FORMAT_A32F:
307 *(float*)element = color.a;
308 break;
John Bauman89401822014-05-06 15:04:28 -0400309 case FORMAT_R32F:
310 *(float*)element = color.r;
311 break;
312 case FORMAT_G32R32F:
313 ((float*)element)[0] = color.r;
314 ((float*)element)[1] = color.g;
315 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400316 case FORMAT_X32B32G32R32F:
317 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400318 case FORMAT_B32G32R32F:
319 ((float*)element)[0] = color.r;
320 ((float*)element)[1] = color.g;
321 ((float*)element)[2] = color.b;
322 break;
John Bauman89401822014-05-06 15:04:28 -0400323 case FORMAT_A32B32G32R32F:
324 ((float*)element)[0] = color.r;
325 ((float*)element)[1] = color.g;
326 ((float*)element)[2] = color.b;
327 ((float*)element)[3] = color.a;
328 break;
329 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500330 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400331 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400332 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500333 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400334 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400335 *((float*)element) = color.r;
336 break;
337 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500338 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -0400339 *((float*)element) = 1 - color.r;
340 break;
341 case FORMAT_S8:
342 *((unsigned char*)element) = unorm<8>(color.r);
343 break;
344 case FORMAT_L8:
345 *(unsigned char*)element = unorm<8>(color.r);
346 break;
347 case FORMAT_A4L4:
348 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
349 break;
350 case FORMAT_L16:
351 *(unsigned short*)element = unorm<16>(color.r);
352 break;
353 case FORMAT_A8L8:
354 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
355 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400356 case FORMAT_L16F:
357 *(half*)element = (half)color.r;
358 break;
359 case FORMAT_A16L16F:
360 ((half*)element)[0] = (half)color.r;
361 ((half*)element)[1] = (half)color.a;
362 break;
363 case FORMAT_L32F:
364 *(float*)element = color.r;
365 break;
366 case FORMAT_A32L32F:
367 ((float*)element)[0] = color.r;
368 ((float*)element)[1] = color.a;
369 break;
John Bauman89401822014-05-06 15:04:28 -0400370 default:
371 ASSERT(false);
372 }
373 }
374
375 Color<float> Surface::Buffer::read(int x, int y, int z) const
376 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -0500377 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
John Bauman89401822014-05-06 15:04:28 -0400378
379 return read(element);
380 }
381
382 Color<float> Surface::Buffer::read(int x, int y) const
383 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -0500384 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -0400385
386 return read(element);
387 }
388
389 inline Color<float> Surface::Buffer::read(void *element) const
390 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400391 float r = 0.0f;
392 float g = 0.0f;
393 float b = 0.0f;
394 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400395
396 switch(format)
397 {
398 case FORMAT_P8:
399 {
400 ASSERT(palette);
401
402 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400403
John Bauman89401822014-05-06 15:04:28 -0400404 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
405 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
406 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
407 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
408 }
409 break;
410 case FORMAT_A8P8:
411 {
412 ASSERT(palette);
413
414 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400415
John Bauman89401822014-05-06 15:04:28 -0400416 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
417 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
418 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
419 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
420 }
421 break;
422 case FORMAT_A8:
423 r = 0;
424 g = 0;
425 b = 0;
426 a = *(unsigned char*)element * (1.0f / 0xFF);
427 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400428 case FORMAT_R8I_SNORM:
429 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
430 break;
John Bauman89401822014-05-06 15:04:28 -0400431 case FORMAT_R8:
432 r = *(unsigned char*)element * (1.0f / 0xFF);
433 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400434 case FORMAT_R8I:
435 r = *(signed char*)element;
436 break;
437 case FORMAT_R8UI:
438 r = *(unsigned char*)element;
439 break;
John Bauman89401822014-05-06 15:04:28 -0400440 case FORMAT_R3G3B2:
441 {
442 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400443
John Bauman89401822014-05-06 15:04:28 -0400444 r = (rgb & 0xE0) * (1.0f / 0xE0);
445 g = (rgb & 0x1C) * (1.0f / 0x1C);
446 b = (rgb & 0x03) * (1.0f / 0x03);
447 }
448 break;
449 case FORMAT_A8R3G3B2:
450 {
451 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400452
John Bauman89401822014-05-06 15:04:28 -0400453 a = (argb & 0xFF00) * (1.0f / 0xFF00);
454 r = (argb & 0x00E0) * (1.0f / 0x00E0);
455 g = (argb & 0x001C) * (1.0f / 0x001C);
456 b = (argb & 0x0003) * (1.0f / 0x0003);
457 }
458 break;
459 case FORMAT_X4R4G4B4:
460 {
461 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400462
John Bauman89401822014-05-06 15:04:28 -0400463 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
464 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
465 b = (rgb & 0x000F) * (1.0f / 0x000F);
466 }
467 break;
468 case FORMAT_A4R4G4B4:
469 {
470 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400471
John Bauman89401822014-05-06 15:04:28 -0400472 a = (argb & 0xF000) * (1.0f / 0xF000);
473 r = (argb & 0x0F00) * (1.0f / 0x0F00);
474 g = (argb & 0x00F0) * (1.0f / 0x00F0);
475 b = (argb & 0x000F) * (1.0f / 0x000F);
476 }
477 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400478 case FORMAT_R4G4B4A4:
479 {
480 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400481
Nicolas Capens80594422015-06-09 16:42:56 -0400482 r = (rgba & 0xF000) * (1.0f / 0xF000);
483 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
484 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
485 a = (rgba & 0x000F) * (1.0f / 0x000F);
486 }
487 break;
John Bauman89401822014-05-06 15:04:28 -0400488 case FORMAT_R5G6B5:
489 {
490 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400491
John Bauman89401822014-05-06 15:04:28 -0400492 r = (rgb & 0xF800) * (1.0f / 0xF800);
493 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
494 b = (rgb & 0x001F) * (1.0f / 0x001F);
495 }
496 break;
497 case FORMAT_A1R5G5B5:
498 {
499 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400500
John Bauman89401822014-05-06 15:04:28 -0400501 a = (argb & 0x8000) * (1.0f / 0x8000);
502 r = (argb & 0x7C00) * (1.0f / 0x7C00);
503 g = (argb & 0x03E0) * (1.0f / 0x03E0);
504 b = (argb & 0x001F) * (1.0f / 0x001F);
505 }
506 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400507 case FORMAT_R5G5B5A1:
508 {
509 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400510
Nicolas Capens80594422015-06-09 16:42:56 -0400511 r = (rgba & 0xF800) * (1.0f / 0xF800);
512 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
513 b = (rgba & 0x003E) * (1.0f / 0x003E);
514 a = (rgba & 0x0001) * (1.0f / 0x0001);
515 }
516 break;
John Bauman89401822014-05-06 15:04:28 -0400517 case FORMAT_X1R5G5B5:
518 {
519 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400520
John Bauman89401822014-05-06 15:04:28 -0400521 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
522 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
523 b = (xrgb & 0x001F) * (1.0f / 0x001F);
524 }
525 break;
526 case FORMAT_A8R8G8B8:
527 {
528 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400529
John Bauman89401822014-05-06 15:04:28 -0400530 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
531 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
532 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
533 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
534 }
535 break;
536 case FORMAT_X8R8G8B8:
537 {
538 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400539
John Bauman89401822014-05-06 15:04:28 -0400540 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
541 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
542 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
543 }
544 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400545 case FORMAT_A8B8G8R8I_SNORM:
546 {
547 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400548
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400549 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
550 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
551 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
552 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
553 }
554 break;
John Bauman89401822014-05-06 15:04:28 -0400555 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400556 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400557 {
558 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400559
John Bauman89401822014-05-06 15:04:28 -0400560 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
561 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
562 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
563 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
564 }
565 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400566 case FORMAT_A8B8G8R8I:
567 {
568 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400569
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400570 r = abgr[0];
571 g = abgr[1];
572 b = abgr[2];
573 a = abgr[3];
574 }
575 break;
576 case FORMAT_A8B8G8R8UI:
577 {
578 unsigned char* abgr = (unsigned char*)element;
579
580 r = abgr[0];
581 g = abgr[1];
582 b = abgr[2];
583 a = abgr[3];
584 }
585 break;
586 case FORMAT_X8B8G8R8I_SNORM:
587 {
588 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400589
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400590 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
591 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
592 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
593 }
594 break;
John Bauman89401822014-05-06 15:04:28 -0400595 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400596 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400597 {
598 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400599
John Bauman89401822014-05-06 15:04:28 -0400600 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
601 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
602 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
603 }
604 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400605 case FORMAT_X8B8G8R8I:
606 {
607 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400608
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400609 r = bgr[0];
610 g = bgr[1];
611 b = bgr[2];
612 }
613 break;
614 case FORMAT_X8B8G8R8UI:
615 {
616 unsigned char* bgr = (unsigned char*)element;
617
618 r = bgr[0];
619 g = bgr[1];
620 b = bgr[2];
621 }
622 break;
623 case FORMAT_G8R8I_SNORM:
624 {
625 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400626
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400627 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
628 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
629 }
630 break;
John Bauman89401822014-05-06 15:04:28 -0400631 case FORMAT_G8R8:
632 {
633 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400634
John Bauman89401822014-05-06 15:04:28 -0400635 g = (gr & 0xFF00) * (1.0f / 0xFF00);
636 r = (gr & 0x00FF) * (1.0f / 0x00FF);
637 }
638 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400639 case FORMAT_G8R8I:
640 {
641 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400642
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400643 r = gr[0];
644 g = gr[1];
645 }
646 break;
647 case FORMAT_G8R8UI:
648 {
649 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400650
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400651 r = gr[0];
652 g = gr[1];
653 }
654 break;
655 case FORMAT_R16I:
656 r = *((short*)element);
657 break;
658 case FORMAT_R16UI:
659 r = *((unsigned short*)element);
660 break;
661 case FORMAT_G16R16I:
662 {
663 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400664
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400665 r = gr[0];
666 g = gr[1];
667 }
668 break;
John Bauman89401822014-05-06 15:04:28 -0400669 case FORMAT_G16R16:
670 {
671 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400672
John Bauman89401822014-05-06 15:04:28 -0400673 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
674 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
675 }
676 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400677 case FORMAT_G16R16UI:
678 {
679 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400680
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400681 r = gr[0];
682 g = gr[1];
683 }
684 break;
John Bauman89401822014-05-06 15:04:28 -0400685 case FORMAT_A2R10G10B10:
686 {
687 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400688
John Bauman89401822014-05-06 15:04:28 -0400689 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
690 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
691 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
692 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
693 }
694 break;
695 case FORMAT_A2B10G10R10:
696 {
697 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400698
John Bauman89401822014-05-06 15:04:28 -0400699 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
700 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
701 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
702 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
703 }
704 break;
Nicolas Capens5555af42017-12-14 13:14:03 -0500705 case FORMAT_A2B10G10R10UI:
706 {
707 unsigned int abgr = *(unsigned int*)element;
708
709 a = static_cast<float>((abgr & 0xC0000000) >> 30);
710 b = static_cast<float>((abgr & 0x3FF00000) >> 20);
711 g = static_cast<float>((abgr & 0x000FFC00) >> 10);
712 r = static_cast<float>(abgr & 0x000003FF);
713 }
714 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400715 case FORMAT_A16B16G16R16I:
716 {
717 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400718
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400719 r = abgr[0];
720 g = abgr[1];
721 b = abgr[2];
722 a = abgr[3];
723 }
724 break;
John Bauman89401822014-05-06 15:04:28 -0400725 case FORMAT_A16B16G16R16:
726 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
727 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
728 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
729 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
730 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400731 case FORMAT_A16B16G16R16UI:
732 {
733 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400734
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400735 r = abgr[0];
736 g = abgr[1];
737 b = abgr[2];
738 a = abgr[3];
739 }
740 break;
741 case FORMAT_X16B16G16R16I:
742 {
743 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400744
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400745 r = bgr[0];
746 g = bgr[1];
747 b = bgr[2];
748 }
749 break;
750 case FORMAT_X16B16G16R16UI:
751 {
752 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400753
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400754 r = bgr[0];
755 g = bgr[1];
756 b = bgr[2];
757 }
758 break;
759 case FORMAT_A32B32G32R32I:
760 {
761 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400762
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400763 r = static_cast<float>(abgr[0]);
764 g = static_cast<float>(abgr[1]);
765 b = static_cast<float>(abgr[2]);
766 a = static_cast<float>(abgr[3]);
767 }
768 break;
769 case FORMAT_A32B32G32R32UI:
770 {
771 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400772
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400773 r = static_cast<float>(abgr[0]);
774 g = static_cast<float>(abgr[1]);
775 b = static_cast<float>(abgr[2]);
776 a = static_cast<float>(abgr[3]);
777 }
778 break;
779 case FORMAT_X32B32G32R32I:
780 {
781 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400782
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400783 r = static_cast<float>(bgr[0]);
784 g = static_cast<float>(bgr[1]);
785 b = static_cast<float>(bgr[2]);
786 }
787 break;
788 case FORMAT_X32B32G32R32UI:
789 {
790 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400791
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400792 r = static_cast<float>(bgr[0]);
793 g = static_cast<float>(bgr[1]);
794 b = static_cast<float>(bgr[2]);
795 }
796 break;
797 case FORMAT_G32R32I:
798 {
799 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400800
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400801 r = static_cast<float>(gr[0]);
802 g = static_cast<float>(gr[1]);
803 }
804 break;
805 case FORMAT_G32R32UI:
806 {
807 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400808
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400809 r = static_cast<float>(gr[0]);
810 g = static_cast<float>(gr[1]);
811 }
812 break;
813 case FORMAT_R32I:
814 r = static_cast<float>(*((int*)element));
815 break;
816 case FORMAT_R32UI:
817 r = static_cast<float>(*((unsigned int*)element));
818 break;
John Bauman89401822014-05-06 15:04:28 -0400819 case FORMAT_V8U8:
820 {
821 unsigned short vu = *(unsigned short*)element;
822
823 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
824 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
825 }
826 break;
827 case FORMAT_L6V5U5:
828 {
829 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400830
John Bauman89401822014-05-06 15:04:28 -0400831 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
832 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
833 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
834 }
835 break;
836 case FORMAT_Q8W8V8U8:
837 {
838 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400839
John Bauman89401822014-05-06 15:04:28 -0400840 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
841 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
842 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
843 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
844 }
845 break;
846 case FORMAT_X8L8V8U8:
847 {
848 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400849
John Bauman89401822014-05-06 15:04:28 -0400850 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
851 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
852 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
853 }
854 break;
855 case FORMAT_R8G8B8:
856 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
857 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
858 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
859 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400860 case FORMAT_B8G8R8:
861 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
862 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
863 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
864 break;
John Bauman89401822014-05-06 15:04:28 -0400865 case FORMAT_V16U16:
866 {
867 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400868
John Bauman89401822014-05-06 15:04:28 -0400869 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
870 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
871 }
872 break;
873 case FORMAT_A2W10V10U10:
874 {
875 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400876
John Bauman89401822014-05-06 15:04:28 -0400877 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
878 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
879 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
880 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
881 }
882 break;
883 case FORMAT_A16W16V16U16:
884 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
885 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
886 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
887 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
888 break;
889 case FORMAT_Q16W16V16U16:
890 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
891 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
892 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
893 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
894 break;
895 case FORMAT_L8:
896 r =
897 g =
898 b = *(unsigned char*)element * (1.0f / 0xFF);
899 break;
900 case FORMAT_A4L4:
901 {
902 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400903
John Bauman89401822014-05-06 15:04:28 -0400904 r =
905 g =
906 b = (al & 0x0F) * (1.0f / 0x0F);
907 a = (al & 0xF0) * (1.0f / 0xF0);
908 }
909 break;
910 case FORMAT_L16:
911 r =
912 g =
913 b = *(unsigned short*)element * (1.0f / 0xFFFF);
914 break;
915 case FORMAT_A8L8:
916 r =
917 g =
918 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
919 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
920 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400921 case FORMAT_L16F:
922 r =
923 g =
924 b = *(half*)element;
925 break;
926 case FORMAT_A16L16F:
927 r =
928 g =
929 b = ((half*)element)[0];
930 a = ((half*)element)[1];
931 break;
932 case FORMAT_L32F:
933 r =
934 g =
935 b = *(float*)element;
936 break;
937 case FORMAT_A32L32F:
938 r =
939 g =
940 b = ((float*)element)[0];
941 a = ((float*)element)[1];
942 break;
943 case FORMAT_A16F:
944 a = *(half*)element;
945 break;
John Bauman89401822014-05-06 15:04:28 -0400946 case FORMAT_R16F:
947 r = *(half*)element;
948 break;
949 case FORMAT_G16R16F:
950 r = ((half*)element)[0];
951 g = ((half*)element)[1];
952 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400953 case FORMAT_B16G16R16F:
954 r = ((half*)element)[0];
955 g = ((half*)element)[1];
956 b = ((half*)element)[2];
957 break;
John Bauman89401822014-05-06 15:04:28 -0400958 case FORMAT_A16B16G16R16F:
959 r = ((half*)element)[0];
960 g = ((half*)element)[1];
961 b = ((half*)element)[2];
962 a = ((half*)element)[3];
963 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400964 case FORMAT_A32F:
965 a = *(float*)element;
966 break;
John Bauman89401822014-05-06 15:04:28 -0400967 case FORMAT_R32F:
968 r = *(float*)element;
969 break;
970 case FORMAT_G32R32F:
971 r = ((float*)element)[0];
972 g = ((float*)element)[1];
973 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400974 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400975 case FORMAT_B32G32R32F:
976 r = ((float*)element)[0];
977 g = ((float*)element)[1];
978 b = ((float*)element)[2];
979 break;
John Bauman89401822014-05-06 15:04:28 -0400980 case FORMAT_A32B32G32R32F:
981 r = ((float*)element)[0];
982 g = ((float*)element)[1];
983 b = ((float*)element)[2];
984 a = ((float*)element)[3];
985 break;
986 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500987 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400988 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400989 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500990 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400991 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400992 r = *(float*)element;
993 g = r;
994 b = r;
995 a = r;
996 break;
997 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500998 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400999 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -04001000 g = r;
1001 b = r;
1002 a = r;
1003 break;
1004 case FORMAT_S8:
1005 r = *(unsigned char*)element * (1.0f / 0xFF);
1006 break;
1007 default:
1008 ASSERT(false);
1009 }
1010
1011 // if(sRGB)
1012 // {
1013 // r = sRGBtoLinear(r);
1014 // g = sRGBtoLinear(g);
1015 // b = sRGBtoLinear(b);
1016 // }
1017
1018 return Color<float>(r, g, b, a);
1019 }
1020
1021 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1022 {
1023 x -= 0.5f;
1024 y -= 0.5f;
1025 z -= 0.5f;
1026
1027 int x0 = clamp((int)x, 0, width - 1);
1028 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1029
1030 int y0 = clamp((int)y, 0, height - 1);
1031 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1032
1033 int z0 = clamp((int)z, 0, depth - 1);
1034 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1035
1036 Color<float> c000 = read(x0, y0, z0);
1037 Color<float> c100 = read(x1, y0, z0);
1038 Color<float> c010 = read(x0, y1, z0);
1039 Color<float> c110 = read(x1, y1, z0);
1040 Color<float> c001 = read(x0, y0, z1);
1041 Color<float> c101 = read(x1, y0, z1);
1042 Color<float> c011 = read(x0, y1, z1);
1043 Color<float> c111 = read(x1, y1, z1);
1044
1045 float fx = x - x0;
1046 float fy = y - y0;
1047 float fz = z - z0;
1048
1049 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1050 c100 *= fx * (1 - fy) * (1 - fz);
1051 c010 *= (1 - fx) * fy * (1 - fz);
1052 c110 *= fx * fy * (1 - fz);
1053 c001 *= (1 - fx) * (1 - fy) * fz;
1054 c101 *= fx * (1 - fy) * fz;
1055 c011 *= (1 - fx) * fy * fz;
1056 c111 *= fx * fy * fz;
1057
1058 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1059 }
1060
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001061 Color<float> Surface::Buffer::sample(float x, float y, int layer) const
John Bauman89401822014-05-06 15:04:28 -04001062 {
1063 x -= 0.5f;
1064 y -= 0.5f;
1065
1066 int x0 = clamp((int)x, 0, width - 1);
1067 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1068
1069 int y0 = clamp((int)y, 0, height - 1);
1070 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1071
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001072 Color<float> c00 = read(x0, y0, layer);
1073 Color<float> c10 = read(x1, y0, layer);
1074 Color<float> c01 = read(x0, y1, layer);
1075 Color<float> c11 = read(x1, y1, layer);
John Bauman89401822014-05-06 15:04:28 -04001076
1077 float fx = x - x0;
1078 float fy = y - y0;
1079
1080 c00 *= (1 - fx) * (1 - fy);
1081 c10 *= fx * (1 - fy);
1082 c01 *= (1 - fx) * fy;
1083 c11 *= fx * fy;
1084
1085 return c00 + c10 + c01 + c11;
1086 }
1087
John Bauman19bac1e2014-05-06 15:23:49 -04001088 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001089 {
1090 this->lock = lock;
1091
1092 switch(lock)
1093 {
1094 case LOCK_UNLOCKED:
1095 case LOCK_READONLY:
1096 break;
1097 case LOCK_WRITEONLY:
1098 case LOCK_READWRITE:
1099 case LOCK_DISCARD:
1100 dirty = true;
1101 break;
1102 default:
1103 ASSERT(false);
1104 }
1105
John Baumand4ae8632014-05-06 16:18:33 -04001106 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001107 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001108 x += border;
1109 y += border;
1110
John Baumand4ae8632014-05-06 16:18:33 -04001111 switch(format)
1112 {
1113 #if S3TC_SUPPORT
1114 case FORMAT_DXT1:
1115 #endif
1116 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001117 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001118 case FORMAT_R11_EAC:
1119 case FORMAT_SIGNED_R11_EAC:
1120 case FORMAT_RGB8_ETC2:
1121 case FORMAT_SRGB8_ETC2:
1122 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1123 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001124 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001125 case FORMAT_RG11_EAC:
1126 case FORMAT_SIGNED_RG11_EAC:
1127 case FORMAT_RGBA8_ETC2_EAC:
1128 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1129 case FORMAT_RGBA_ASTC_4x4_KHR:
1130 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1131 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1132 case FORMAT_RGBA_ASTC_5x4_KHR:
1133 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1134 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1135 case FORMAT_RGBA_ASTC_5x5_KHR:
1136 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1137 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1138 case FORMAT_RGBA_ASTC_6x5_KHR:
1139 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1140 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1141 case FORMAT_RGBA_ASTC_6x6_KHR:
1142 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1143 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1144 case FORMAT_RGBA_ASTC_8x5_KHR:
1145 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1146 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1147 case FORMAT_RGBA_ASTC_8x6_KHR:
1148 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1149 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1150 case FORMAT_RGBA_ASTC_8x8_KHR:
1151 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1152 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1153 case FORMAT_RGBA_ASTC_10x5_KHR:
1154 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1155 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1156 case FORMAT_RGBA_ASTC_10x6_KHR:
1157 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1158 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1159 case FORMAT_RGBA_ASTC_10x8_KHR:
1160 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1161 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1162 case FORMAT_RGBA_ASTC_10x10_KHR:
1163 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1164 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1165 case FORMAT_RGBA_ASTC_12x10_KHR:
1166 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1167 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1168 case FORMAT_RGBA_ASTC_12x12_KHR:
1169 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1170 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001171 #if S3TC_SUPPORT
1172 case FORMAT_DXT3:
1173 case FORMAT_DXT5:
1174 #endif
1175 case FORMAT_ATI2:
1176 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1177 default:
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001178 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001179 }
John Bauman89401822014-05-06 15:04:28 -04001180 }
1181
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001182 return nullptr;
John Bauman89401822014-05-06 15:04:28 -04001183 }
1184
1185 void Surface::Buffer::unlockRect()
1186 {
1187 lock = LOCK_UNLOCKED;
1188 }
1189
Nicolas Capensf41f0332017-05-30 15:25:50 -04001190 class SurfaceImplementation : public Surface
1191 {
1192 public:
1193 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1194 : Surface(width, height, depth, format, pixels, pitch, slice) {}
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001195 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0)
1196 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {}
Nicolas Capensf41f0332017-05-30 15:25:50 -04001197 ~SurfaceImplementation() override {};
1198
1199 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
1200 {
1201 return Surface::lockInternal(x, y, z, lock, client);
1202 }
1203
1204 void unlockInternal() override
1205 {
1206 Surface::unlockInternal();
1207 }
1208 };
1209
1210 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1211 {
1212 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
1213 }
1214
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001215 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided)
Nicolas Capensf41f0332017-05-30 15:25:50 -04001216 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001217 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided);
Nicolas Capensf41f0332017-05-30 15:25:50 -04001218 }
1219
Nicolas Capens477314b2015-06-09 16:47:29 -04001220 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1221 {
1222 resource = new Resource(0);
1223 hasParent = false;
1224 ownExternal = false;
1225 depth = max(1, depth);
1226
1227 external.buffer = pixels;
1228 external.width = width;
1229 external.height = height;
1230 external.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001231 external.samples = 1;
Nicolas Capens477314b2015-06-09 16:47:29 -04001232 external.format = format;
1233 external.bytes = bytes(external.format);
1234 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001235 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001236 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001237 external.sliceP = external.bytes ? slice / external.bytes : 0;
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001238 external.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001239 external.lock = LOCK_UNLOCKED;
1240 external.dirty = true;
1241
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001242 internal.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001243 internal.width = width;
1244 internal.height = height;
1245 internal.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001246 internal.samples = 1;
Nicolas Capens477314b2015-06-09 16:47:29 -04001247 internal.format = selectInternalFormat(format);
1248 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001249 internal.pitchB = pitchB(internal.width, 0, internal.format, false);
1250 internal.pitchP = pitchP(internal.width, 0, internal.format, false);
1251 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
1252 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
1253 internal.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001254 internal.lock = LOCK_UNLOCKED;
1255 internal.dirty = false;
1256
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001257 stencil.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001258 stencil.width = width;
1259 stencil.height = height;
1260 stencil.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001261 stencil.samples = 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001262 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
Nicolas Capens477314b2015-06-09 16:47:29 -04001263 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001264 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
1265 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
1266 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
1267 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
1268 stencil.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001269 stencil.lock = LOCK_UNLOCKED;
1270 stencil.dirty = false;
1271
Nicolas Capens73e18c12017-11-28 13:31:35 -05001272 dirtyContents = true;
Nicolas Capens477314b2015-06-09 16:47:29 -04001273 paletteUsed = 0;
1274 }
1275
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001276 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001277 {
1278 resource = texture ? texture : new Resource(0);
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001279 hasParent = texture != nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001280 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001281 depth = max(1, depth);
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001282 samples = max(1, samples);
John Bauman89401822014-05-06 15:04:28 -04001283
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001284 external.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001285 external.width = width;
1286 external.height = height;
1287 external.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001288 external.samples = (short)samples;
John Bauman89401822014-05-06 15:04:28 -04001289 external.format = format;
1290 external.bytes = bytes(external.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001291 external.pitchB = pitchB(external.width, 0, external.format, renderTarget && !texture);
1292 external.pitchP = pitchP(external.width, 0, external.format, renderTarget && !texture);
1293 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
1294 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
1295 external.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001296 external.lock = LOCK_UNLOCKED;
1297 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001298
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001299 internal.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001300 internal.width = width;
1301 internal.height = height;
1302 internal.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001303 internal.samples = (short)samples;
John Bauman89401822014-05-06 15:04:28 -04001304 internal.format = selectInternalFormat(format);
1305 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001306 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1307 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
1308 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
1309 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001310 internal.border = (short)border;
John Bauman89401822014-05-06 15:04:28 -04001311 internal.lock = LOCK_UNLOCKED;
1312 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001313
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001314 stencil.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001315 stencil.width = width;
1316 stencil.height = height;
1317 stencil.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001318 stencil.samples = (short)samples;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001319 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
John Bauman89401822014-05-06 15:04:28 -04001320 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001321 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
1322 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
1323 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1324 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1325 stencil.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001326 stencil.lock = LOCK_UNLOCKED;
1327 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001328
Nicolas Capens73e18c12017-11-28 13:31:35 -05001329 dirtyContents = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001330 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001331 }
1332
1333 Surface::~Surface()
1334 {
Nicolas Capensbf7a8142017-05-19 10:57:28 -04001335 // sync() must be called before this destructor to ensure all locks have been released.
1336 // We can't call it here because the parent resource may already have been destroyed.
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001337 ASSERT(isUnlocked());
John Bauman8a4f6fc2014-05-06 15:26:18 -04001338
John Bauman89401822014-05-06 15:04:28 -04001339 if(!hasParent)
1340 {
1341 resource->destruct();
1342 }
1343
Nicolas Capens477314b2015-06-09 16:47:29 -04001344 if(ownExternal)
1345 {
1346 deallocate(external.buffer);
1347 }
John Bauman89401822014-05-06 15:04:28 -04001348
1349 if(internal.buffer != external.buffer)
1350 {
1351 deallocate(internal.buffer);
1352 }
1353
1354 deallocate(stencil.buffer);
1355
1356 external.buffer = 0;
1357 internal.buffer = 0;
1358 stencil.buffer = 0;
1359 }
1360
John Bauman19bac1e2014-05-06 15:23:49 -04001361 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001362 {
1363 resource->lock(client);
1364
1365 if(!external.buffer)
1366 {
1367 if(internal.buffer && identicalFormats())
1368 {
1369 external.buffer = internal.buffer;
1370 }
1371 else
1372 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001373 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format);
John Bauman89401822014-05-06 15:04:28 -04001374 }
1375 }
1376
1377 if(internal.dirty)
1378 {
1379 if(lock != LOCK_DISCARD)
1380 {
1381 update(external, internal);
1382 }
John Bauman66b8ab22014-05-06 15:57:45 -04001383
1384 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001385 }
1386
1387 switch(lock)
1388 {
1389 case LOCK_READONLY:
1390 break;
1391 case LOCK_WRITEONLY:
1392 case LOCK_READWRITE:
1393 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001394 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001395 break;
1396 default:
1397 ASSERT(false);
1398 }
1399
John Bauman19bac1e2014-05-06 15:23:49 -04001400 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001401 }
1402
1403 void Surface::unlockExternal()
1404 {
John Bauman89401822014-05-06 15:04:28 -04001405 external.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001406
1407 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001408 }
1409
John Bauman19bac1e2014-05-06 15:23:49 -04001410 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001411 {
1412 if(lock != LOCK_UNLOCKED)
1413 {
1414 resource->lock(client);
1415 }
1416
1417 if(!internal.buffer)
1418 {
1419 if(external.buffer && identicalFormats())
1420 {
1421 internal.buffer = external.buffer;
1422 }
1423 else
1424 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001425 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format);
John Bauman89401822014-05-06 15:04:28 -04001426 }
1427 }
1428
1429 // FIXME: WHQL requires conversion to lower external precision and back
1430 if(logPrecision >= WHQL)
1431 {
1432 if(internal.dirty && renderTarget && internal.format != external.format)
1433 {
1434 if(lock != LOCK_DISCARD)
1435 {
1436 switch(external.format)
1437 {
1438 case FORMAT_R3G3B2:
1439 case FORMAT_A8R3G3B2:
1440 case FORMAT_A1R5G5B5:
1441 case FORMAT_A2R10G10B10:
1442 case FORMAT_A2B10G10R10:
1443 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1444 unlockExternal();
1445 break;
1446 default:
1447 // Difference passes WHQL
1448 break;
1449 }
1450 }
1451 }
1452 }
1453
John Bauman66b8ab22014-05-06 15:57:45 -04001454 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001455 {
1456 if(lock != LOCK_DISCARD)
1457 {
1458 update(internal, external);
1459 }
John Bauman89401822014-05-06 15:04:28 -04001460
John Bauman66b8ab22014-05-06 15:57:45 -04001461 external.dirty = false;
1462 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001463 }
1464
1465 switch(lock)
1466 {
1467 case LOCK_UNLOCKED:
1468 case LOCK_READONLY:
1469 break;
1470 case LOCK_WRITEONLY:
1471 case LOCK_READWRITE:
1472 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001473 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001474 break;
1475 default:
1476 ASSERT(false);
1477 }
1478
1479 if(lock == LOCK_READONLY && client == PUBLIC)
1480 {
1481 resolve();
1482 }
1483
John Bauman19bac1e2014-05-06 15:23:49 -04001484 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001485 }
1486
1487 void Surface::unlockInternal()
1488 {
John Bauman89401822014-05-06 15:04:28 -04001489 internal.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001490
1491 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001492 }
1493
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001494 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001495 {
1496 resource->lock(client);
1497
1498 if(!stencil.buffer)
1499 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001500 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format);
John Bauman89401822014-05-06 15:04:28 -04001501 }
1502
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001503 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001504 }
1505
1506 void Surface::unlockStencil()
1507 {
John Bauman89401822014-05-06 15:04:28 -04001508 stencil.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001509
1510 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001511 }
1512
1513 int Surface::bytes(Format format)
1514 {
1515 switch(format)
1516 {
1517 case FORMAT_NULL: return 0;
1518 case FORMAT_P8: return 1;
1519 case FORMAT_A8P8: return 2;
1520 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001521 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001522 case FORMAT_R8: return 1;
1523 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001524 case FORMAT_R16I: return 2;
1525 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001526 case FORMAT_A8R3G3B2: return 2;
1527 case FORMAT_R5G6B5: return 2;
1528 case FORMAT_A1R5G5B5: return 2;
1529 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001530 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001531 case FORMAT_X4R4G4B4: return 2;
1532 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001533 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001534 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001535 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001536 case FORMAT_R32I: return 4;
1537 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001538 case FORMAT_X8R8G8B8: return 4;
1539 // case FORMAT_X8G8R8B8Q: return 4;
1540 case FORMAT_A8R8G8B8: return 4;
1541 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001542 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001543 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001544 case FORMAT_SRGB8_X8: return 4;
1545 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001546 case FORMAT_A8B8G8R8I: return 4;
1547 case FORMAT_R8UI: return 1;
1548 case FORMAT_G8R8UI: return 2;
1549 case FORMAT_X8B8G8R8UI: return 4;
1550 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001551 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001552 case FORMAT_R8I_SNORM: return 1;
1553 case FORMAT_G8R8I_SNORM: return 2;
1554 case FORMAT_X8B8G8R8I_SNORM: return 4;
1555 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001556 case FORMAT_A2R10G10B10: return 4;
1557 case FORMAT_A2B10G10R10: return 4;
Nicolas Capens5555af42017-12-14 13:14:03 -05001558 case FORMAT_A2B10G10R10UI: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001559 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001560 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001561 case FORMAT_G16R16I: return 4;
1562 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001563 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001564 case FORMAT_G32R32I: return 8;
1565 case FORMAT_G32R32UI: return 8;
1566 case FORMAT_X16B16G16R16I: return 8;
1567 case FORMAT_X16B16G16R16UI: return 8;
1568 case FORMAT_A16B16G16R16I: return 8;
1569 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001570 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001571 case FORMAT_X32B32G32R32I: return 16;
1572 case FORMAT_X32B32G32R32UI: return 16;
1573 case FORMAT_A32B32G32R32I: return 16;
1574 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001575 // Compressed formats
1576 #if S3TC_SUPPORT
1577 case FORMAT_DXT1: return 2; // Column of four pixels
1578 case FORMAT_DXT3: return 4; // Column of four pixels
1579 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001580 #endif
John Bauman89401822014-05-06 15:04:28 -04001581 case FORMAT_ATI1: return 2; // Column of four pixels
1582 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001583 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001584 case FORMAT_R11_EAC: return 2;
1585 case FORMAT_SIGNED_R11_EAC: return 2;
1586 case FORMAT_RG11_EAC: return 4;
1587 case FORMAT_SIGNED_RG11_EAC: return 4;
1588 case FORMAT_RGB8_ETC2: return 2;
1589 case FORMAT_SRGB8_ETC2: return 2;
1590 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1591 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1592 case FORMAT_RGBA8_ETC2_EAC: return 4;
1593 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1594 case FORMAT_RGBA_ASTC_4x4_KHR:
1595 case FORMAT_RGBA_ASTC_5x4_KHR:
1596 case FORMAT_RGBA_ASTC_5x5_KHR:
1597 case FORMAT_RGBA_ASTC_6x5_KHR:
1598 case FORMAT_RGBA_ASTC_6x6_KHR:
1599 case FORMAT_RGBA_ASTC_8x5_KHR:
1600 case FORMAT_RGBA_ASTC_8x6_KHR:
1601 case FORMAT_RGBA_ASTC_8x8_KHR:
1602 case FORMAT_RGBA_ASTC_10x5_KHR:
1603 case FORMAT_RGBA_ASTC_10x6_KHR:
1604 case FORMAT_RGBA_ASTC_10x8_KHR:
1605 case FORMAT_RGBA_ASTC_10x10_KHR:
1606 case FORMAT_RGBA_ASTC_12x10_KHR:
1607 case FORMAT_RGBA_ASTC_12x12_KHR:
1608 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1609 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1610 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1611 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1612 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1613 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1614 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1615 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1616 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1617 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1618 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1619 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1620 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1621 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001622 // Bumpmap formats
1623 case FORMAT_V8U8: return 2;
1624 case FORMAT_L6V5U5: return 2;
1625 case FORMAT_Q8W8V8U8: return 4;
1626 case FORMAT_X8L8V8U8: return 4;
1627 case FORMAT_A2W10V10U10: return 4;
1628 case FORMAT_V16U16: return 4;
1629 case FORMAT_A16W16V16U16: return 8;
1630 case FORMAT_Q16W16V16U16: return 8;
1631 // Luminance formats
1632 case FORMAT_L8: return 1;
1633 case FORMAT_A4L4: return 1;
1634 case FORMAT_L16: return 2;
1635 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001636 case FORMAT_L16F: return 2;
1637 case FORMAT_A16L16F: return 4;
1638 case FORMAT_L32F: return 4;
1639 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001640 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001641 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001642 case FORMAT_R16F: return 2;
1643 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001644 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001645 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001646 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001647 case FORMAT_R32F: return 4;
1648 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001649 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001650 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001651 case FORMAT_A32B32G32R32F: return 16;
1652 // Depth/stencil formats
1653 case FORMAT_D16: return 2;
1654 case FORMAT_D32: return 4;
1655 case FORMAT_D24X8: return 4;
1656 case FORMAT_D24S8: return 4;
1657 case FORMAT_D24FS8: return 4;
1658 case FORMAT_D32F: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001659 case FORMAT_D32FS8: return 4;
John Bauman89401822014-05-06 15:04:28 -04001660 case FORMAT_D32F_COMPLEMENTARY: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001661 case FORMAT_D32FS8_COMPLEMENTARY: return 4;
John Bauman89401822014-05-06 15:04:28 -04001662 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001663 case FORMAT_D32FS8_TEXTURE: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001664 case FORMAT_D32F_SHADOW: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001665 case FORMAT_D32FS8_SHADOW: return 4;
1666 case FORMAT_DF24S8: return 4;
1667 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001668 case FORMAT_INTZ: return 4;
1669 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001670 case FORMAT_YV12_BT601: return 1; // Y plane only
1671 case FORMAT_YV12_BT709: return 1; // Y plane only
1672 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001673 default:
1674 ASSERT(false);
1675 }
1676
1677 return 0;
1678 }
1679
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001680 int Surface::pitchB(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001681 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001682 width += 2 * border;
1683
John Bauman89401822014-05-06 15:04:28 -04001684 if(target || isDepth(format) || isStencil(format))
1685 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001686 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001687 }
1688
1689 switch(format)
1690 {
1691 #if S3TC_SUPPORT
1692 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001693 #endif
1694 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001695 case FORMAT_R11_EAC:
1696 case FORMAT_SIGNED_R11_EAC:
1697 case FORMAT_RGB8_ETC2:
1698 case FORMAT_SRGB8_ETC2:
1699 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1700 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001701 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001702 case FORMAT_RG11_EAC:
1703 case FORMAT_SIGNED_RG11_EAC:
1704 case FORMAT_RGBA8_ETC2_EAC:
1705 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1706 case FORMAT_RGBA_ASTC_4x4_KHR:
1707 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1708 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1709 case FORMAT_RGBA_ASTC_5x4_KHR:
1710 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1711 case FORMAT_RGBA_ASTC_5x5_KHR:
1712 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1713 return 16 * ((width + 4) / 5);
1714 case FORMAT_RGBA_ASTC_6x5_KHR:
1715 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1716 case FORMAT_RGBA_ASTC_6x6_KHR:
1717 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1718 return 16 * ((width + 5) / 6);
1719 case FORMAT_RGBA_ASTC_8x5_KHR:
1720 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1721 case FORMAT_RGBA_ASTC_8x6_KHR:
1722 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1723 case FORMAT_RGBA_ASTC_8x8_KHR:
1724 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1725 return 16 * ((width + 7) / 8);
1726 case FORMAT_RGBA_ASTC_10x5_KHR:
1727 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1728 case FORMAT_RGBA_ASTC_10x6_KHR:
1729 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1730 case FORMAT_RGBA_ASTC_10x8_KHR:
1731 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1732 case FORMAT_RGBA_ASTC_10x10_KHR:
1733 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1734 return 16 * ((width + 9) / 10);
1735 case FORMAT_RGBA_ASTC_12x10_KHR:
1736 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1737 case FORMAT_RGBA_ASTC_12x12_KHR:
1738 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1739 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001740 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001741 case FORMAT_DXT3:
1742 case FORMAT_DXT5:
1743 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001744 #endif
John Bauman89401822014-05-06 15:04:28 -04001745 case FORMAT_ATI1:
1746 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1747 case FORMAT_ATI2:
1748 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001749 case FORMAT_YV12_BT601:
1750 case FORMAT_YV12_BT709:
1751 case FORMAT_YV12_JFIF:
1752 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001753 default:
1754 return bytes(format) * width;
1755 }
1756 }
1757
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001758 int Surface::pitchP(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001759 {
1760 int B = bytes(format);
1761
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001762 return B > 0 ? pitchB(width, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001763 }
1764
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001765 int Surface::sliceB(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001766 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001767 height += 2 * border;
1768
John Bauman89401822014-05-06 15:04:28 -04001769 if(target || isDepth(format) || isStencil(format))
1770 {
1771 height = ((height + 1) & ~1);
1772 }
1773
1774 switch(format)
1775 {
1776 #if S3TC_SUPPORT
1777 case FORMAT_DXT1:
1778 case FORMAT_DXT3:
1779 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001780 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001781 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001782 case FORMAT_R11_EAC:
1783 case FORMAT_SIGNED_R11_EAC:
1784 case FORMAT_RG11_EAC:
1785 case FORMAT_SIGNED_RG11_EAC:
1786 case FORMAT_RGB8_ETC2:
1787 case FORMAT_SRGB8_ETC2:
1788 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1789 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1790 case FORMAT_RGBA8_ETC2_EAC:
1791 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1792 case FORMAT_RGBA_ASTC_4x4_KHR:
1793 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1794 case FORMAT_RGBA_ASTC_5x4_KHR:
1795 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001796 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001797 case FORMAT_RGBA_ASTC_5x5_KHR:
1798 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1799 case FORMAT_RGBA_ASTC_6x5_KHR:
1800 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1801 case FORMAT_RGBA_ASTC_8x5_KHR:
1802 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1803 case FORMAT_RGBA_ASTC_10x5_KHR:
1804 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001805 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001806 case FORMAT_RGBA_ASTC_6x6_KHR:
1807 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1808 case FORMAT_RGBA_ASTC_8x6_KHR:
1809 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1810 case FORMAT_RGBA_ASTC_10x6_KHR:
1811 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001812 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001813 case FORMAT_RGBA_ASTC_8x8_KHR:
1814 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1815 case FORMAT_RGBA_ASTC_10x8_KHR:
1816 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001817 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001818 case FORMAT_RGBA_ASTC_10x10_KHR:
1819 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1820 case FORMAT_RGBA_ASTC_12x10_KHR:
1821 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001822 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001823 case FORMAT_RGBA_ASTC_12x12_KHR:
1824 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001825 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001826 case FORMAT_ATI1:
1827 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001828 default:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001829 return pitchB(width, border, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001830 }
1831 }
1832
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001833 int Surface::sliceP(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001834 {
1835 int B = bytes(format);
1836
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001837 return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001838 }
1839
1840 void Surface::update(Buffer &destination, Buffer &source)
1841 {
1842 // ASSERT(source.lock != LOCK_UNLOCKED);
1843 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001844
John Bauman89401822014-05-06 15:04:28 -04001845 if(destination.buffer != source.buffer)
1846 {
1847 ASSERT(source.dirty && !destination.dirty);
1848
1849 switch(source.format)
1850 {
1851 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001852 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1853 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1854 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1855 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1856 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1857 #if S3TC_SUPPORT
1858 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1859 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1860 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001861 #endif
John Bauman89401822014-05-06 15:04:28 -04001862 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1863 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001864 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1865 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1866 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1867 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001868 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001869 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1870 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1871 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1872 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1873 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1874 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1875 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1876 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1877 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1878 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1879 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1880 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1881 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1882 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1883 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1884 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1885 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1886 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1887 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1888 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1889 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1890 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1891 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1892 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1893 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1894 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1895 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1896 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1897 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1898 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1899 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1900 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1901 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1902 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001903 default: genericUpdate(destination, source); break;
1904 }
1905 }
John Bauman89401822014-05-06 15:04:28 -04001906 }
1907
1908 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1909 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001910 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1911 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001912
1913 int depth = min(destination.depth, source.depth);
1914 int height = min(destination.height, source.height);
1915 int width = min(destination.width, source.width);
1916 int rowBytes = width * source.bytes;
1917
1918 for(int z = 0; z < depth; z++)
1919 {
1920 unsigned char *sourceRow = sourceSlice;
1921 unsigned char *destinationRow = destinationSlice;
1922
1923 for(int y = 0; y < height; y++)
1924 {
1925 if(source.format == destination.format)
1926 {
1927 memcpy(destinationRow, sourceRow, rowBytes);
1928 }
1929 else
1930 {
1931 unsigned char *sourceElement = sourceRow;
1932 unsigned char *destinationElement = destinationRow;
1933
1934 for(int x = 0; x < width; x++)
1935 {
1936 Color<float> color = source.read(sourceElement);
1937 destination.write(destinationElement, color);
1938
1939 sourceElement += source.bytes;
1940 destinationElement += destination.bytes;
1941 }
1942 }
1943
1944 sourceRow += source.pitchB;
1945 destinationRow += destination.pitchB;
1946 }
1947
1948 sourceSlice += source.sliceB;
1949 destinationSlice += destination.sliceB;
1950 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001951
1952 source.unlockRect();
1953 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001954 }
1955
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001956 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001957 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001958 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1959 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001960
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001961 int depth = min(destination.depth, source.depth);
1962 int height = min(destination.height, source.height);
1963 int width = min(destination.width, source.width);
1964
1965 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001966 {
1967 unsigned char *sourceRow = sourceSlice;
1968 unsigned char *destinationRow = destinationSlice;
1969
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001970 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04001971 {
1972 unsigned char *sourceElement = sourceRow;
1973 unsigned char *destinationElement = destinationRow;
1974
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001975 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04001976 {
1977 unsigned int b = sourceElement[0];
1978 unsigned int g = sourceElement[1];
1979 unsigned int r = sourceElement[2];
1980
1981 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1982
1983 sourceElement += source.bytes;
1984 destinationElement += destination.bytes;
1985 }
1986
1987 sourceRow += source.pitchB;
1988 destinationRow += destination.pitchB;
1989 }
1990
1991 sourceSlice += source.sliceB;
1992 destinationSlice += destination.sliceB;
1993 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001994
1995 source.unlockRect();
1996 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001997 }
1998
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001999 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002000 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002001 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2002 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002003
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002004 int depth = min(destination.depth, source.depth);
2005 int height = min(destination.height, source.height);
2006 int width = min(destination.width, source.width);
2007
2008 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002009 {
2010 unsigned char *sourceRow = sourceSlice;
2011 unsigned char *destinationRow = destinationSlice;
2012
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002013 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002014 {
2015 unsigned char *sourceElement = sourceRow;
2016 unsigned char *destinationElement = destinationRow;
2017
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002018 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002019 {
2020 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002021
John Bauman89401822014-05-06 15:04:28 -04002022 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2023 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2024 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
2025
2026 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2027
2028 sourceElement += source.bytes;
2029 destinationElement += destination.bytes;
2030 }
2031
2032 sourceRow += source.pitchB;
2033 destinationRow += destination.pitchB;
2034 }
2035
2036 sourceSlice += source.sliceB;
2037 destinationSlice += destination.sliceB;
2038 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002039
2040 source.unlockRect();
2041 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002042 }
2043
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002044 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002045 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002046 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2047 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002048
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002049 int depth = min(destination.depth, source.depth);
2050 int height = min(destination.height, source.height);
2051 int width = min(destination.width, source.width);
2052
2053 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002054 {
2055 unsigned char *sourceRow = sourceSlice;
2056 unsigned char *destinationRow = destinationSlice;
2057
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002058 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002059 {
2060 unsigned char *sourceElement = sourceRow;
2061 unsigned char *destinationElement = destinationRow;
2062
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002063 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002064 {
2065 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002066
John Bauman89401822014-05-06 15:04:28 -04002067 unsigned int a = (argb & 0x8000) * 130560;
2068 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2069 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2070 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
2071
2072 *(unsigned int*)destinationElement = a | r | g | b;
2073
2074 sourceElement += source.bytes;
2075 destinationElement += destination.bytes;
2076 }
2077
2078 sourceRow += source.pitchB;
2079 destinationRow += destination.pitchB;
2080 }
2081
2082 sourceSlice += source.sliceB;
2083 destinationSlice += destination.sliceB;
2084 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002085
2086 source.unlockRect();
2087 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002088 }
2089
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002090 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002091 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002092 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2093 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002094
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002095 int depth = min(destination.depth, source.depth);
2096 int height = min(destination.height, source.height);
2097 int width = min(destination.width, source.width);
2098
2099 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002100 {
2101 unsigned char *sourceRow = sourceSlice;
2102 unsigned char *destinationRow = destinationSlice;
2103
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002104 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002105 {
2106 unsigned char *sourceElement = sourceRow;
2107 unsigned char *destinationElement = destinationRow;
2108
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002109 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002110 {
2111 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002112
John Bauman89401822014-05-06 15:04:28 -04002113 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2114 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2115 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2116
2117 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2118
2119 sourceElement += source.bytes;
2120 destinationElement += destination.bytes;
2121 }
2122
2123 sourceRow += source.pitchB;
2124 destinationRow += destination.pitchB;
2125 }
2126
2127 sourceSlice += source.sliceB;
2128 destinationSlice += destination.sliceB;
2129 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002130
2131 source.unlockRect();
2132 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002133 }
2134
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002135 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002136 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002137 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2138 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002139
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002140 int depth = min(destination.depth, source.depth);
2141 int height = min(destination.height, source.height);
2142 int width = min(destination.width, source.width);
2143
2144 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002145 {
2146 unsigned char *sourceRow = sourceSlice;
2147 unsigned char *destinationRow = destinationSlice;
2148
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002149 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002150 {
2151 unsigned char *sourceElement = sourceRow;
2152 unsigned char *destinationElement = destinationRow;
2153
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002154 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002155 {
2156 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002157
John Bauman89401822014-05-06 15:04:28 -04002158 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2159 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2160 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2161 unsigned int b = (argb & 0x000F) * 0x00000011;
2162
2163 *(unsigned int*)destinationElement = a | r | g | b;
2164
2165 sourceElement += source.bytes;
2166 destinationElement += destination.bytes;
2167 }
2168
2169 sourceRow += source.pitchB;
2170 destinationRow += destination.pitchB;
2171 }
2172
2173 sourceSlice += source.sliceB;
2174 destinationSlice += destination.sliceB;
2175 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002176
2177 source.unlockRect();
2178 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002179 }
2180
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002181 void Surface::decodeP8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002182 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002183 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2184 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002185
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002186 int depth = min(destination.depth, source.depth);
2187 int height = min(destination.height, source.height);
2188 int width = min(destination.width, source.width);
2189
2190 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002191 {
2192 unsigned char *sourceRow = sourceSlice;
2193 unsigned char *destinationRow = destinationSlice;
2194
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002195 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002196 {
2197 unsigned char *sourceElement = sourceRow;
2198 unsigned char *destinationElement = destinationRow;
2199
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002200 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002201 {
2202 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2203
2204 unsigned int r = (abgr & 0x000000FF) << 16;
2205 unsigned int g = (abgr & 0x0000FF00) << 0;
2206 unsigned int b = (abgr & 0x00FF0000) >> 16;
2207 unsigned int a = (abgr & 0xFF000000) >> 0;
2208
2209 *(unsigned int*)destinationElement = a | r | g | b;
2210
2211 sourceElement += source.bytes;
2212 destinationElement += destination.bytes;
2213 }
2214
2215 sourceRow += source.pitchB;
2216 destinationRow += destination.pitchB;
2217 }
2218
2219 sourceSlice += source.sliceB;
2220 destinationSlice += destination.sliceB;
2221 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002222
2223 source.unlockRect();
2224 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002225 }
2226
2227#if S3TC_SUPPORT
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002228 void Surface::decodeDXT1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002229 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002230 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2231 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002232
2233 for(int z = 0; z < external.depth; z++)
2234 {
2235 unsigned int *dest = destSlice;
2236
2237 for(int y = 0; y < external.height; y += 4)
2238 {
2239 for(int x = 0; x < external.width; x += 4)
2240 {
2241 Color<byte> c[4];
2242
2243 c[0] = source->c0;
2244 c[1] = source->c1;
2245
2246 if(source->c0 > source->c1) // No transparency
2247 {
2248 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2249 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2250 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2251 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2252 c[2].a = 0xFF;
2253
2254 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2255 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2256 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2257 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2258 c[3].a = 0xFF;
2259 }
2260 else // c3 transparent
2261 {
2262 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2263 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2264 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2265 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2266 c[2].a = 0xFF;
2267
2268 c[3].r = 0;
2269 c[3].g = 0;
2270 c[3].b = 0;
2271 c[3].a = 0;
2272 }
2273
2274 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2275 {
2276 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2277 {
2278 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2279 }
2280 }
2281
2282 source++;
2283 }
2284 }
2285
2286 (byte*&)destSlice += internal.sliceB;
2287 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002288
2289 external.unlockRect();
2290 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002291 }
2292
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002293 void Surface::decodeDXT3(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002294 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002295 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2296 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002297
2298 for(int z = 0; z < external.depth; z++)
2299 {
2300 unsigned int *dest = destSlice;
2301
2302 for(int y = 0; y < external.height; y += 4)
2303 {
2304 for(int x = 0; x < external.width; x += 4)
2305 {
2306 Color<byte> c[4];
2307
2308 c[0] = source->c0;
2309 c[1] = source->c1;
2310
2311 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2312 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2313 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2314 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2315
2316 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2317 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2318 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2319 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2320
2321 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2322 {
2323 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2324 {
2325 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2326 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2327
2328 dest[(x + i) + (y + j) * internal.width] = color;
2329 }
2330 }
2331
2332 source++;
2333 }
2334 }
2335
2336 (byte*&)destSlice += internal.sliceB;
2337 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002338
2339 external.unlockRect();
2340 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002341 }
2342
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002343 void Surface::decodeDXT5(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002344 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002345 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2346 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002347
2348 for(int z = 0; z < external.depth; z++)
2349 {
2350 unsigned int *dest = destSlice;
2351
2352 for(int y = 0; y < external.height; y += 4)
2353 {
2354 for(int x = 0; x < external.width; x += 4)
2355 {
2356 Color<byte> c[4];
2357
2358 c[0] = source->c0;
2359 c[1] = source->c1;
2360
2361 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2362 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2363 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2364 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2365
2366 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2367 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2368 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2369 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2370
2371 byte a[8];
2372
2373 a[0] = source->a0;
2374 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002375
John Bauman89401822014-05-06 15:04:28 -04002376 if(a[0] > a[1])
2377 {
2378 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2379 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2380 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2381 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2382 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2383 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2384 }
2385 else
2386 {
2387 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2388 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2389 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2390 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2391 a[6] = 0;
2392 a[7] = 0xFF;
2393 }
2394
2395 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2396 {
2397 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2398 {
2399 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2400 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002401
John Bauman89401822014-05-06 15:04:28 -04002402 dest[(x + i) + (y + j) * internal.width] = color;
2403 }
2404 }
2405
2406 source++;
2407 }
2408 }
2409
2410 (byte*&)destSlice += internal.sliceB;
2411 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002412
2413 external.unlockRect();
2414 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002415 }
Nicolas Capens22658242014-11-29 00:31:41 -05002416#endif
John Bauman89401822014-05-06 15:04:28 -04002417
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002418 void Surface::decodeATI1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002419 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002420 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2421 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002422
2423 for(int z = 0; z < external.depth; z++)
2424 {
2425 byte *dest = destSlice;
2426
2427 for(int y = 0; y < external.height; y += 4)
2428 {
2429 for(int x = 0; x < external.width; x += 4)
2430 {
2431 byte r[8];
2432
2433 r[0] = source->r0;
2434 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002435
John Bauman89401822014-05-06 15:04:28 -04002436 if(r[0] > r[1])
2437 {
2438 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2439 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2440 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2441 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2442 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2443 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2444 }
2445 else
2446 {
2447 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2448 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2449 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2450 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2451 r[6] = 0;
2452 r[7] = 0xFF;
2453 }
2454
2455 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2456 {
2457 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2458 {
2459 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2460 }
2461 }
2462
2463 source++;
2464 }
2465 }
2466
2467 destSlice += internal.sliceB;
2468 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002469
2470 external.unlockRect();
2471 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002472 }
2473
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002474 void Surface::decodeATI2(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002475 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002476 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2477 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002478
2479 for(int z = 0; z < external.depth; z++)
2480 {
2481 word *dest = destSlice;
2482
2483 for(int y = 0; y < external.height; y += 4)
2484 {
2485 for(int x = 0; x < external.width; x += 4)
2486 {
2487 byte X[8];
2488
2489 X[0] = source->x0;
2490 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002491
John Bauman89401822014-05-06 15:04:28 -04002492 if(X[0] > X[1])
2493 {
2494 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2495 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2496 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2497 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2498 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2499 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2500 }
2501 else
2502 {
2503 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2504 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2505 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2506 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2507 X[6] = 0;
2508 X[7] = 0xFF;
2509 }
2510
2511 byte Y[8];
2512
2513 Y[0] = source->y0;
2514 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002515
John Bauman89401822014-05-06 15:04:28 -04002516 if(Y[0] > Y[1])
2517 {
2518 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2519 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2520 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2521 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2522 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2523 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2524 }
2525 else
2526 {
2527 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2528 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2529 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2530 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2531 Y[6] = 0;
2532 Y[7] = 0xFF;
2533 }
2534
2535 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2536 {
2537 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2538 {
2539 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2540 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2541
2542 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2543 }
2544 }
2545
2546 source++;
2547 }
2548 }
2549
2550 (byte*&)destSlice += internal.sliceB;
2551 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002552
2553 external.unlockRect();
2554 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002555 }
Nicolas Capens22658242014-11-29 00:31:41 -05002556
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002557 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002558 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002559 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002560 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002561 external.unlockRect();
2562 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002563
Alexis Hetu0de50d42015-09-09 13:56:41 -04002564 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002565 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002566 static byte sRGBtoLinearTable[256];
2567 static bool sRGBtoLinearTableDirty = true;
2568 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002569 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002570 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002571 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002572 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002573 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002574 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002575 }
2576
Alexis Hetu0de50d42015-09-09 13:56:41 -04002577 // Perform sRGB conversion in place after decoding
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002578 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002579 for(int y = 0; y < internal.height; y++)
2580 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002581 byte *srcRow = src + y * internal.pitchB;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002582 for(int x = 0; x < internal.width; x++)
2583 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002584 byte *srcPix = srcRow + x * internal.bytes;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002585 for(int i = 0; i < 3; i++)
2586 {
2587 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2588 }
2589 }
2590 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002591 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002592 }
2593 }
John Bauman89401822014-05-06 15:04:28 -04002594
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002595 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002596 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002597 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002598
Alexis Hetuf46493f2017-12-18 15:32:26 -05002599 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
2600 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002601 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002602 external.unlockRect();
Alexis Hetu0de50d42015-09-09 13:56:41 -04002603
Alexis Hetuf46493f2017-12-18 15:32:26 -05002604 // FIXME: We convert EAC data to float, until signed short internal formats are supported
2605 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats
2606 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f));
2607 for(int y = 0; y < internal.height; y++)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002608 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002609 byte* srcRow = src + y * internal.pitchB;
2610 for(int x = internal.width - 1; x >= 0; x--)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002611 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002612 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes);
2613 float* dstPix = reinterpret_cast<float*>(srcPix);
2614 for(int c = nbChannels - 1; c >= 0; c--)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002615 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002616 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002617 }
2618 }
2619 }
Alexis Hetuf46493f2017-12-18 15:32:26 -05002620
2621 internal.unlockRect();
Alexis Hetu460e41f2015-09-01 10:58:37 -04002622 }
2623
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002624 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002625 {
2626 }
2627
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002628 unsigned int Surface::size(int width, int height, int depth, int border, int samples, Format format)
John Bauman89401822014-05-06 15:04:28 -04002629 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002630 width += 2 * border;
2631 height += 2 * border;
2632
Nicolas Capens00555c42015-07-21 15:15:30 -04002633 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002634 int width4 = align(width, 4);
2635 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002636
2637 switch(format)
2638 {
2639 #if S3TC_SUPPORT
2640 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002641 #endif
John Bauman89401822014-05-06 15:04:28 -04002642 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002643 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002644 case FORMAT_R11_EAC:
2645 case FORMAT_SIGNED_R11_EAC:
2646 case FORMAT_RGB8_ETC2:
2647 case FORMAT_SRGB8_ETC2:
2648 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2649 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002650 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002651 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002652 case FORMAT_DXT3:
2653 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002654 #endif
John Bauman89401822014-05-06 15:04:28 -04002655 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002656 case FORMAT_RG11_EAC:
2657 case FORMAT_SIGNED_RG11_EAC:
2658 case FORMAT_RGBA8_ETC2_EAC:
2659 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2660 case FORMAT_RGBA_ASTC_4x4_KHR:
2661 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002662 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002663 case FORMAT_RGBA_ASTC_5x4_KHR:
2664 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2665 return align(width, 5) * height4 * depth;
2666 case FORMAT_RGBA_ASTC_5x5_KHR:
2667 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2668 return align(width, 5) * align(height, 5) * depth;
2669 case FORMAT_RGBA_ASTC_6x5_KHR:
2670 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2671 return align(width, 6) * align(height, 5) * depth;
2672 case FORMAT_RGBA_ASTC_6x6_KHR:
2673 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2674 return align(width, 6) * align(height, 6) * depth;
2675 case FORMAT_RGBA_ASTC_8x5_KHR:
2676 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2677 return align(width, 8) * align(height, 5) * depth;
2678 case FORMAT_RGBA_ASTC_8x6_KHR:
2679 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2680 return align(width, 8) * align(height, 6) * depth;
2681 case FORMAT_RGBA_ASTC_8x8_KHR:
2682 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2683 return align(width, 8) * align(height, 8) * depth;
2684 case FORMAT_RGBA_ASTC_10x5_KHR:
2685 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2686 return align(width, 10) * align(height, 5) * depth;
2687 case FORMAT_RGBA_ASTC_10x6_KHR:
2688 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2689 return align(width, 10) * align(height, 6) * depth;
2690 case FORMAT_RGBA_ASTC_10x8_KHR:
2691 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2692 return align(width, 10) * align(height, 8) * depth;
2693 case FORMAT_RGBA_ASTC_10x10_KHR:
2694 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2695 return align(width, 10) * align(height, 10) * depth;
2696 case FORMAT_RGBA_ASTC_12x10_KHR:
2697 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2698 return align(width, 12) * align(height, 10) * depth;
2699 case FORMAT_RGBA_ASTC_12x12_KHR:
2700 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2701 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002702 case FORMAT_YV12_BT601:
2703 case FORMAT_YV12_BT709:
2704 case FORMAT_YV12_JFIF:
2705 {
2706 unsigned int YStride = align(width, 16);
2707 unsigned int YSize = YStride * height;
2708 unsigned int CStride = align(YStride / 2, 16);
Nicolas Capens0bac2852016-05-07 06:09:58 -04002709 unsigned int CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002710
2711 return YSize + 2 * CSize;
2712 }
John Bauman89401822014-05-06 15:04:28 -04002713 default:
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002714 return bytes(format) * width * height * depth * samples;
John Bauman89401822014-05-06 15:04:28 -04002715 }
John Bauman89401822014-05-06 15:04:28 -04002716 }
2717
2718 bool Surface::isStencil(Format format)
2719 {
2720 switch(format)
2721 {
2722 case FORMAT_D32:
2723 case FORMAT_D16:
2724 case FORMAT_D24X8:
2725 case FORMAT_D32F:
2726 case FORMAT_D32F_COMPLEMENTARY:
2727 case FORMAT_D32F_LOCKABLE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002728 case FORMAT_D32F_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002729 return false;
2730 case FORMAT_D24S8:
2731 case FORMAT_D24FS8:
2732 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002733 case FORMAT_DF24S8:
2734 case FORMAT_DF16S8:
2735 case FORMAT_D32FS8_TEXTURE:
2736 case FORMAT_D32FS8_SHADOW:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002737 case FORMAT_D32FS8:
2738 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002739 case FORMAT_INTZ:
2740 return true;
2741 default:
2742 return false;
2743 }
2744 }
2745
2746 bool Surface::isDepth(Format format)
2747 {
2748 switch(format)
2749 {
2750 case FORMAT_D32:
2751 case FORMAT_D16:
2752 case FORMAT_D24X8:
2753 case FORMAT_D24S8:
2754 case FORMAT_D24FS8:
2755 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002756 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002757 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002758 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002759 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002760 case FORMAT_DF24S8:
2761 case FORMAT_DF16S8:
2762 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002763 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002764 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002765 case FORMAT_INTZ:
2766 return true;
2767 case FORMAT_S8:
2768 return false;
2769 default:
2770 return false;
2771 }
2772 }
2773
Alexis Hetub9dda642016-10-06 11:25:32 -04002774 bool Surface::hasQuadLayout(Format format)
2775 {
2776 switch(format)
2777 {
2778 case FORMAT_D32:
2779 case FORMAT_D16:
2780 case FORMAT_D24X8:
2781 case FORMAT_D24S8:
2782 case FORMAT_D24FS8:
2783 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002784 case FORMAT_D32FS8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002785 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002786 case FORMAT_D32FS8_COMPLEMENTARY:
Alexis Hetub9dda642016-10-06 11:25:32 -04002787 case FORMAT_DF24S8:
2788 case FORMAT_DF16S8:
2789 case FORMAT_INTZ:
2790 case FORMAT_S8:
2791 case FORMAT_A8G8R8B8Q:
2792 case FORMAT_X8G8R8B8Q:
2793 return true;
2794 case FORMAT_D32F_LOCKABLE:
2795 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002796 case FORMAT_D32F_SHADOW:
Alexis Hetub9dda642016-10-06 11:25:32 -04002797 case FORMAT_D32FS8_SHADOW:
2798 default:
2799 break;
2800 }
2801
2802 return false;
2803 }
2804
John Bauman89401822014-05-06 15:04:28 -04002805 bool Surface::isPalette(Format format)
2806 {
2807 switch(format)
2808 {
2809 case FORMAT_P8:
2810 case FORMAT_A8P8:
2811 return true;
2812 default:
2813 return false;
2814 }
2815 }
2816
2817 bool Surface::isFloatFormat(Format format)
2818 {
2819 switch(format)
2820 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002821 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002822 case FORMAT_R8G8B8:
2823 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002824 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002825 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002826 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002827 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002828 case FORMAT_SRGB8_X8:
2829 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002830 case FORMAT_A8B8G8R8I:
2831 case FORMAT_R8UI:
2832 case FORMAT_G8R8UI:
2833 case FORMAT_X8B8G8R8UI:
2834 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002835 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002836 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002837 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002838 case FORMAT_A2B10G10R10:
Nicolas Capens5555af42017-12-14 13:14:03 -05002839 case FORMAT_A2B10G10R10UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04002840 case FORMAT_R8I_SNORM:
2841 case FORMAT_G8R8I_SNORM:
2842 case FORMAT_X8B8G8R8I_SNORM:
2843 case FORMAT_A8B8G8R8I_SNORM:
2844 case FORMAT_R16I:
2845 case FORMAT_R16UI:
2846 case FORMAT_G16R16I:
2847 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002848 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002849 case FORMAT_X16B16G16R16I:
2850 case FORMAT_X16B16G16R16UI:
2851 case FORMAT_A16B16G16R16I:
2852 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002853 case FORMAT_A16B16G16R16:
2854 case FORMAT_V8U8:
2855 case FORMAT_Q8W8V8U8:
2856 case FORMAT_X8L8V8U8:
2857 case FORMAT_V16U16:
2858 case FORMAT_A16W16V16U16:
2859 case FORMAT_Q16W16V16U16:
2860 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002861 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002862 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002863 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002864 case FORMAT_L8:
2865 case FORMAT_L16:
2866 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002867 case FORMAT_YV12_BT601:
2868 case FORMAT_YV12_BT709:
2869 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002870 case FORMAT_R32I:
2871 case FORMAT_R32UI:
2872 case FORMAT_G32R32I:
2873 case FORMAT_G32R32UI:
2874 case FORMAT_X32B32G32R32I:
2875 case FORMAT_X32B32G32R32UI:
2876 case FORMAT_A32B32G32R32I:
2877 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002878 return false;
Nicolas Capens400667e2017-03-29 14:40:14 -04002879 case FORMAT_R16F:
2880 case FORMAT_G16R16F:
2881 case FORMAT_B16G16R16F:
2882 case FORMAT_A16B16G16R16F:
John Bauman89401822014-05-06 15:04:28 -04002883 case FORMAT_R32F:
2884 case FORMAT_G32R32F:
Nicolas Capensc018e082016-12-13 10:19:33 -05002885 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002886 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002887 case FORMAT_A32B32G32R32F:
2888 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002889 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002890 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002891 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002892 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002893 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002894 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002895 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002896 case FORMAT_L16F:
2897 case FORMAT_A16L16F:
2898 case FORMAT_L32F:
2899 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002900 return true;
2901 default:
2902 ASSERT(false);
2903 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002904
John Bauman89401822014-05-06 15:04:28 -04002905 return false;
2906 }
2907
2908 bool Surface::isUnsignedComponent(Format format, int component)
2909 {
2910 switch(format)
2911 {
2912 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002913 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002914 case FORMAT_R8G8B8:
2915 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002916 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002917 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002918 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002919 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002920 case FORMAT_SRGB8_X8:
2921 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002922 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002923 case FORMAT_A2B10G10R10:
Nicolas Capens5555af42017-12-14 13:14:03 -05002924 case FORMAT_A2B10G10R10UI:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002925 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002926 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002927 case FORMAT_G16R16UI:
2928 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002929 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002930 case FORMAT_A16B16G16R16UI:
2931 case FORMAT_R32UI:
2932 case FORMAT_G32R32UI:
2933 case FORMAT_X32B32G32R32UI:
2934 case FORMAT_A32B32G32R32UI:
2935 case FORMAT_R8UI:
2936 case FORMAT_G8R8UI:
2937 case FORMAT_X8B8G8R8UI:
2938 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002939 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002940 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002941 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002942 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002943 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002944 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002945 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002946 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002947 case FORMAT_A8:
2948 case FORMAT_R8:
2949 case FORMAT_L8:
2950 case FORMAT_L16:
2951 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002952 case FORMAT_YV12_BT601:
2953 case FORMAT_YV12_BT709:
2954 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002955 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002956 case FORMAT_A8B8G8R8I:
2957 case FORMAT_A16B16G16R16I:
2958 case FORMAT_A32B32G32R32I:
2959 case FORMAT_A8B8G8R8I_SNORM:
2960 case FORMAT_Q8W8V8U8:
2961 case FORMAT_Q16W16V16U16:
2962 case FORMAT_A32B32G32R32F:
2963 return false;
2964 case FORMAT_R32F:
2965 case FORMAT_R8I:
2966 case FORMAT_R16I:
2967 case FORMAT_R32I:
2968 case FORMAT_R8I_SNORM:
2969 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002970 case FORMAT_V8U8:
2971 case FORMAT_X8L8V8U8:
2972 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002973 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002974 case FORMAT_G8R8I:
2975 case FORMAT_G16R16I:
2976 case FORMAT_G32R32I:
2977 case FORMAT_G8R8I_SNORM:
2978 return component >= 2;
2979 case FORMAT_A16W16V16U16:
Nicolas Capens2e363b02016-12-14 10:32:36 -05002980 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002981 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002982 case FORMAT_X8B8G8R8I:
2983 case FORMAT_X16B16G16R16I:
2984 case FORMAT_X32B32G32R32I:
2985 case FORMAT_X8B8G8R8I_SNORM:
2986 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002987 default:
2988 ASSERT(false);
2989 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002990
John Bauman89401822014-05-06 15:04:28 -04002991 return false;
2992 }
2993
2994 bool Surface::isSRGBreadable(Format format)
2995 {
2996 // Keep in sync with Capabilities::isSRGBreadable
2997 switch(format)
2998 {
2999 case FORMAT_L8:
3000 case FORMAT_A8L8:
3001 case FORMAT_R8G8B8:
3002 case FORMAT_A8R8G8B8:
3003 case FORMAT_X8R8G8B8:
3004 case FORMAT_A8B8G8R8:
3005 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04003006 case FORMAT_SRGB8_X8:
3007 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04003008 case FORMAT_R5G6B5:
3009 case FORMAT_X1R5G5B5:
3010 case FORMAT_A1R5G5B5:
3011 case FORMAT_A4R4G4B4:
3012 #if S3TC_SUPPORT
3013 case FORMAT_DXT1:
3014 case FORMAT_DXT3:
3015 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003016 #endif
John Bauman89401822014-05-06 15:04:28 -04003017 case FORMAT_ATI1:
3018 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04003019 return true;
3020 default:
3021 return false;
3022 }
John Bauman89401822014-05-06 15:04:28 -04003023 }
3024
3025 bool Surface::isSRGBwritable(Format format)
3026 {
3027 // Keep in sync with Capabilities::isSRGBwritable
3028 switch(format)
3029 {
3030 case FORMAT_NULL:
3031 case FORMAT_A8R8G8B8:
3032 case FORMAT_X8R8G8B8:
3033 case FORMAT_A8B8G8R8:
3034 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04003035 case FORMAT_SRGB8_X8:
3036 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04003037 case FORMAT_R5G6B5:
3038 return true;
3039 default:
3040 return false;
3041 }
3042 }
3043
Nicolas Capens5555af42017-12-14 13:14:03 -05003044 bool Surface::isSRGBformat(Format format)
3045 {
3046 switch(format)
3047 {
3048 case FORMAT_SRGB8_X8:
3049 case FORMAT_SRGB8_A8:
3050 return true;
3051 default:
3052 return false;
3053 }
3054 }
3055
John Bauman89401822014-05-06 15:04:28 -04003056 bool Surface::isCompressed(Format format)
3057 {
3058 switch(format)
3059 {
3060 #if S3TC_SUPPORT
3061 case FORMAT_DXT1:
3062 case FORMAT_DXT3:
3063 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003064 #endif
John Bauman89401822014-05-06 15:04:28 -04003065 case FORMAT_ATI1:
3066 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05003067 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003068 case FORMAT_R11_EAC:
3069 case FORMAT_SIGNED_R11_EAC:
3070 case FORMAT_RG11_EAC:
3071 case FORMAT_SIGNED_RG11_EAC:
3072 case FORMAT_RGB8_ETC2:
3073 case FORMAT_SRGB8_ETC2:
3074 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3075 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3076 case FORMAT_RGBA8_ETC2_EAC:
3077 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3078 case FORMAT_RGBA_ASTC_4x4_KHR:
3079 case FORMAT_RGBA_ASTC_5x4_KHR:
3080 case FORMAT_RGBA_ASTC_5x5_KHR:
3081 case FORMAT_RGBA_ASTC_6x5_KHR:
3082 case FORMAT_RGBA_ASTC_6x6_KHR:
3083 case FORMAT_RGBA_ASTC_8x5_KHR:
3084 case FORMAT_RGBA_ASTC_8x6_KHR:
3085 case FORMAT_RGBA_ASTC_8x8_KHR:
3086 case FORMAT_RGBA_ASTC_10x5_KHR:
3087 case FORMAT_RGBA_ASTC_10x6_KHR:
3088 case FORMAT_RGBA_ASTC_10x8_KHR:
3089 case FORMAT_RGBA_ASTC_10x10_KHR:
3090 case FORMAT_RGBA_ASTC_12x10_KHR:
3091 case FORMAT_RGBA_ASTC_12x12_KHR:
3092 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3093 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3094 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3095 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3096 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3097 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3098 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3099 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3100 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3101 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3102 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3103 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3104 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3105 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04003106 return true;
John Bauman89401822014-05-06 15:04:28 -04003107 default:
3108 return false;
3109 }
3110 }
3111
Nicolas Capens492887a2017-03-27 14:50:51 -04003112 bool Surface::isSignedNonNormalizedInteger(Format format)
Alexis Hetu43577b82015-10-21 15:32:16 -04003113 {
3114 switch(format)
3115 {
3116 case FORMAT_A8B8G8R8I:
3117 case FORMAT_X8B8G8R8I:
3118 case FORMAT_G8R8I:
3119 case FORMAT_R8I:
Alexis Hetu43577b82015-10-21 15:32:16 -04003120 case FORMAT_A16B16G16R16I:
3121 case FORMAT_X16B16G16R16I:
3122 case FORMAT_G16R16I:
3123 case FORMAT_R16I:
Alexis Hetu91dd1c42017-07-18 13:03:42 -04003124 case FORMAT_A32B32G32R32I:
3125 case FORMAT_X32B32G32R32I:
3126 case FORMAT_G32R32I:
3127 case FORMAT_R32I:
Nicolas Capens492887a2017-03-27 14:50:51 -04003128 return true;
3129 default:
3130 return false;
3131 }
3132 }
3133
3134 bool Surface::isUnsignedNonNormalizedInteger(Format format)
3135 {
3136 switch(format)
3137 {
Alexis Hetu91dd1c42017-07-18 13:03:42 -04003138 case FORMAT_A8B8G8R8UI:
3139 case FORMAT_X8B8G8R8UI:
3140 case FORMAT_G8R8UI:
3141 case FORMAT_R8UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04003142 case FORMAT_A16B16G16R16UI:
3143 case FORMAT_X16B16G16R16UI:
3144 case FORMAT_G16R16UI:
3145 case FORMAT_R16UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04003146 case FORMAT_A32B32G32R32UI:
3147 case FORMAT_X32B32G32R32UI:
3148 case FORMAT_G32R32UI:
3149 case FORMAT_R32UI:
3150 return true;
3151 default:
3152 return false;
3153 }
3154 }
3155
Nicolas Capens492887a2017-03-27 14:50:51 -04003156 bool Surface::isNonNormalizedInteger(Format format)
3157 {
3158 return isSignedNonNormalizedInteger(format) ||
3159 isUnsignedNonNormalizedInteger(format);
3160 }
3161
3162 bool Surface::isNormalizedInteger(Format format)
3163 {
3164 return !isFloatFormat(format) &&
3165 !isNonNormalizedInteger(format) &&
3166 !isCompressed(format) &&
3167 !isDepth(format) &&
3168 !isStencil(format);
3169 }
3170
John Bauman89401822014-05-06 15:04:28 -04003171 int Surface::componentCount(Format format)
3172 {
3173 switch(format)
3174 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003175 case FORMAT_R5G6B5: return 3;
3176 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003177 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003178 case FORMAT_X8B8G8R8: return 3;
3179 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04003180 case FORMAT_SRGB8_X8: return 3;
3181 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003182 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003183 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003184 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003185 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003186 case FORMAT_R8I_SNORM: return 1;
3187 case FORMAT_G8R8I_SNORM: return 2;
3188 case FORMAT_X8B8G8R8I_SNORM:return 3;
3189 case FORMAT_A8B8G8R8I_SNORM:return 4;
3190 case FORMAT_R8UI: return 1;
3191 case FORMAT_G8R8UI: return 2;
3192 case FORMAT_X8B8G8R8UI: return 3;
3193 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05003194 case FORMAT_A2B10G10R10: return 4;
Nicolas Capens5555af42017-12-14 13:14:03 -05003195 case FORMAT_A2B10G10R10UI: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003196 case FORMAT_G16R16I: return 2;
3197 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003198 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003199 case FORMAT_G32R32I: return 2;
3200 case FORMAT_G32R32UI: return 2;
3201 case FORMAT_X16B16G16R16I: return 3;
3202 case FORMAT_X16B16G16R16UI: return 3;
3203 case FORMAT_A16B16G16R16I: return 4;
3204 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003205 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003206 case FORMAT_X32B32G32R32I: return 3;
3207 case FORMAT_X32B32G32R32UI: return 3;
3208 case FORMAT_A32B32G32R32I: return 4;
3209 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003210 case FORMAT_V8U8: return 2;
3211 case FORMAT_Q8W8V8U8: return 4;
3212 case FORMAT_X8L8V8U8: return 3;
3213 case FORMAT_V16U16: return 2;
3214 case FORMAT_A16W16V16U16: return 4;
3215 case FORMAT_Q16W16V16U16: return 4;
3216 case FORMAT_R32F: return 1;
3217 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003218 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003219 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003220 case FORMAT_D32F: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003221 case FORMAT_D32FS8: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003222 case FORMAT_D32F_LOCKABLE: return 1;
3223 case FORMAT_D32FS8_TEXTURE: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003224 case FORMAT_D32F_SHADOW: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003225 case FORMAT_D32FS8_SHADOW: return 1;
3226 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003227 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003228 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003229 case FORMAT_R16I: return 1;
3230 case FORMAT_R16UI: return 1;
3231 case FORMAT_R32I: return 1;
3232 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003233 case FORMAT_L8: return 1;
3234 case FORMAT_L16: return 1;
3235 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003236 case FORMAT_YV12_BT601: return 3;
3237 case FORMAT_YV12_BT709: return 3;
3238 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003239 default:
3240 ASSERT(false);
3241 }
3242
3243 return 1;
3244 }
3245
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003246 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format)
John Bauman89401822014-05-06 15:04:28 -04003247 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003248 // Render targets require 2x2 quads
3249 int width2 = (width + 1) & ~1;
3250 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003251
Nicolas Capens6ea71872015-06-26 13:00:48 -04003252 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
Nicolas Capens48ef1252016-11-07 15:30:33 -05003253 // and stencil operations also read 8 bytes per four 8-bit stencil values,
Nicolas Capens6ea71872015-06-26 13:00:48 -04003254 // so we have to allocate 4 extra bytes to avoid buffer overruns.
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003255 return allocate(size(width2, height2, depth, border, samples, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003256 }
3257
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003258 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003259 {
3260 while((size_t)buffer & 0x1 && bytes >= 1)
3261 {
3262 *(char*)buffer = (char)pattern;
3263 (char*&)buffer += 1;
3264 bytes -= 1;
3265 }
3266
3267 while((size_t)buffer & 0x3 && bytes >= 2)
3268 {
3269 *(short*)buffer = (short)pattern;
3270 (short*&)buffer += 1;
3271 bytes -= 2;
3272 }
3273
Nicolas Capens47dc8672017-04-25 12:54:39 -04003274 #if defined(__i386__) || defined(__x86_64__)
3275 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04003276 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003277 while((size_t)buffer & 0xF && bytes >= 4)
3278 {
3279 *(int*)buffer = pattern;
3280 (int*&)buffer += 1;
3281 bytes -= 4;
3282 }
3283
3284 __m128 quad = _mm_set_ps1((float&)pattern);
3285
3286 float *pointer = (float*)buffer;
3287 int qxwords = bytes / 64;
3288 bytes -= qxwords * 64;
3289
3290 while(qxwords--)
3291 {
3292 _mm_stream_ps(pointer + 0, quad);
3293 _mm_stream_ps(pointer + 4, quad);
3294 _mm_stream_ps(pointer + 8, quad);
3295 _mm_stream_ps(pointer + 12, quad);
3296
3297 pointer += 16;
3298 }
3299
3300 buffer = pointer;
John Bauman89401822014-05-06 15:04:28 -04003301 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003302 #endif
John Bauman89401822014-05-06 15:04:28 -04003303
3304 while(bytes >= 4)
3305 {
3306 *(int*)buffer = (int)pattern;
3307 (int*&)buffer += 1;
3308 bytes -= 4;
3309 }
3310
3311 while(bytes >= 2)
3312 {
3313 *(short*)buffer = (short)pattern;
3314 (short*&)buffer += 1;
3315 bytes -= 2;
3316 }
3317
3318 while(bytes >= 1)
3319 {
3320 *(char*)buffer = (char)pattern;
3321 (char*&)buffer += 1;
3322 bytes -= 1;
3323 }
3324 }
3325
Nicolas Capensbf7a8142017-05-19 10:57:28 -04003326 void Surface::sync()
3327 {
3328 resource->lock(EXCLUSIVE);
3329 resource->unlock();
3330 }
3331
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003332 bool Surface::isEntire(const Rect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003333 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003334 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3335 }
John Bauman89401822014-05-06 15:04:28 -04003336
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003337 Rect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003338 {
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003339 return Rect(0, 0, internal.width, internal.height);
John Bauman89401822014-05-06 15:04:28 -04003340 }
3341
Nicolas Capensc39901e2016-03-21 16:37:44 -04003342 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003343 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003344 if(width == 0 || height == 0) return;
3345
John Bauman89401822014-05-06 15:04:28 -04003346 // Not overlapping
3347 if(x0 > internal.width) return;
3348 if(y0 > internal.height) return;
3349 if(x0 + width < 0) return;
3350 if(y0 + height < 0) return;
3351
3352 // Clip against dimensions
3353 if(x0 < 0) {width += x0; x0 = 0;}
3354 if(x0 + width > internal.width) width = internal.width - x0;
3355 if(y0 < 0) {height += y0; y0 = 0;}
3356 if(y0 + height > internal.height) height = internal.height - y0;
3357
3358 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3359 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3360
John Bauman89401822014-05-06 15:04:28 -04003361 int x1 = x0 + width;
3362 int y1 = y0 + height;
3363
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003364 if(!hasQuadLayout(internal.format))
John Bauman89401822014-05-06 15:04:28 -04003365 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003366 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC);
John Bauman89401822014-05-06 15:04:28 -04003367
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003368 for(int z = 0; z < internal.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003369 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003370 float *row = target;
John Bauman89401822014-05-06 15:04:28 -04003371 for(int y = y0; y < y1; y++)
3372 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003373 memfill4(row, (int&)depth, width * sizeof(float));
3374 row += internal.pitchP;
John Bauman89401822014-05-06 15:04:28 -04003375 }
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003376 target += internal.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003377 }
3378
3379 unlockInternal();
3380 }
3381 else // Quad layout
3382 {
3383 if(complementaryDepthBuffer)
3384 {
3385 depth = 1 - depth;
3386 }
3387
3388 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3389
Alexis Hetu358a1442015-12-03 14:23:10 -05003390 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3391 int oddX1 = (x1 & ~1) * 2;
3392 int evenX0 = ((x0 + 1) & ~1) * 2;
3393 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3394
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003395 for(int z = 0; z < internal.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003396 {
3397 for(int y = y0; y < y1; y++)
3398 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003399 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003400
John Bauman89401822014-05-06 15:04:28 -04003401 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3402 {
3403 if((x0 & 1) != 0)
3404 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003405 target[oddX0 + 0] = depth;
3406 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003407 }
3408
Alexis Hetu358a1442015-12-03 14:23:10 -05003409 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003410 // {
3411 // target[x2 + 0] = depth;
3412 // target[x2 + 1] = depth;
3413 // target[x2 + 2] = depth;
3414 // target[x2 + 3] = depth;
3415 // }
3416
3417 // __asm
3418 // {
3419 // movss xmm0, depth
3420 // shufps xmm0, xmm0, 0x00
3421 //
3422 // mov eax, x0
3423 // add eax, 1
3424 // and eax, 0xFFFFFFFE
3425 // cmp eax, x1
3426 // jge qEnd
3427 //
3428 // mov edi, target
3429 //
3430 // qLoop:
3431 // movntps [edi+8*eax], xmm0
3432 //
3433 // add eax, 2
3434 // cmp eax, x1
3435 // jl qLoop
3436 // qEnd:
3437 // }
3438
Alexis Hetu358a1442015-12-03 14:23:10 -05003439 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003440
3441 if((x1 & 1) != 0)
3442 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003443 target[oddX1 + 0] = depth;
3444 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003445 }
3446
3447 y++;
3448 }
3449 else
3450 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003451 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003452 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003453 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003454 }
3455 }
3456 }
3457
3458 buffer += internal.sliceP;
3459 }
3460
3461 unlockInternal();
3462 }
3463 }
3464
Nicolas Capensc39901e2016-03-21 16:37:44 -04003465 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003466 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003467 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003468
John Bauman89401822014-05-06 15:04:28 -04003469 // Not overlapping
3470 if(x0 > internal.width) return;
3471 if(y0 > internal.height) return;
3472 if(x0 + width < 0) return;
3473 if(y0 + height < 0) return;
3474
3475 // Clip against dimensions
3476 if(x0 < 0) {width += x0; x0 = 0;}
3477 if(x0 + width > internal.width) width = internal.width - x0;
3478 if(y0 < 0) {height += y0; y0 = 0;}
3479 if(y0 + height > internal.height) height = internal.height - y0;
3480
John Bauman89401822014-05-06 15:04:28 -04003481 int x1 = x0 + width;
3482 int y1 = y0 + height;
3483
Alexis Hetu358a1442015-12-03 14:23:10 -05003484 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3485 int oddX1 = (x1 & ~1) * 2;
3486 int evenX0 = ((x0 + 1) & ~1) * 2;
3487 int evenBytes = oddX1 - evenX0;
3488
John Bauman89401822014-05-06 15:04:28 -04003489 unsigned char maskedS = s & mask;
3490 unsigned char invMask = ~mask;
3491 unsigned int fill = maskedS;
Tom Anderson69bc6e82017-03-20 11:54:29 -07003492 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
John Bauman89401822014-05-06 15:04:28 -04003493
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003494 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003495
3496 // Stencil buffers are assumed to use quad layout
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003497 for(int z = 0; z < stencil.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003498 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003499 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003500 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003501 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003502
3503 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003504 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003505 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003506 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003507 target[oddX0 + 0] = fill;
3508 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003509 }
3510
Alexis Hetu358a1442015-12-03 14:23:10 -05003511 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003512
3513 if((x1 & 1) != 0)
3514 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003515 target[oddX1 + 0] = fill;
3516 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003517 }
3518
3519 y++;
3520 }
3521 else
3522 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003523 for(int x = x0; x < x1; x++)
Alexis Hetu2b052f82015-11-25 13:57:28 -05003524 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003525 int i = (x & ~1) * 2 + (x & 1);
Alexis Hetu358a1442015-12-03 14:23:10 -05003526 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003527 }
John Bauman89401822014-05-06 15:04:28 -04003528 }
3529 }
3530
Alexis Hetu2b052f82015-11-25 13:57:28 -05003531 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003532 }
John Bauman89401822014-05-06 15:04:28 -04003533
Alexis Hetu2b052f82015-11-25 13:57:28 -05003534 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003535 }
3536
3537 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3538 {
3539 unsigned char *row;
3540 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003541
John Bauman89401822014-05-06 15:04:28 -04003542 if(internal.dirty)
3543 {
3544 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3545 buffer = &internal;
3546 }
3547 else
3548 {
3549 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3550 buffer = &external;
3551 }
3552
3553 if(buffer->bytes <= 4)
3554 {
3555 int c;
3556 buffer->write(&c, color);
3557
3558 if(buffer->bytes <= 1) c = (c << 8) | c;
3559 if(buffer->bytes <= 2) c = (c << 16) | c;
3560
3561 for(int y = 0; y < height; y++)
3562 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003563 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003564
3565 row += buffer->pitchB;
3566 }
3567 }
3568 else // Generic
3569 {
3570 for(int y = 0; y < height; y++)
3571 {
3572 unsigned char *element = row;
3573
3574 for(int x = 0; x < width; x++)
3575 {
3576 buffer->write(element, color);
3577
3578 element += buffer->bytes;
3579 }
3580
3581 row += buffer->pitchB;
3582 }
3583 }
3584
3585 if(buffer == &internal)
3586 {
3587 unlockInternal();
3588 }
3589 else
3590 {
3591 unlockExternal();
3592 }
3593 }
3594
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003595 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003596 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003597 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003598
Alexis Hetu43577b82015-10-21 15:32:16 -04003599 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003600
Alexis Hetu43577b82015-10-21 15:32:16 -04003601 if(!filter)
3602 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003603 color = source->internal.read((int)srcX, (int)srcY, 0);
Alexis Hetu43577b82015-10-21 15:32:16 -04003604 }
3605 else // Bilinear filtering
3606 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003607 color = source->internal.sample(srcX, srcY, 0);
Alexis Hetu43577b82015-10-21 15:32:16 -04003608 }
John Bauman89401822014-05-06 15:04:28 -04003609
3610 internal.write(x, y, color);
3611 }
3612
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003613 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
Alexis Hetu43577b82015-10-21 15:32:16 -04003614 {
3615 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3616
3617 sw::Color<float> color;
3618
3619 if(!filter)
3620 {
3621 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3622 }
3623 else // Bilinear filtering
3624 {
3625 color = source->internal.sample(srcX, srcY, srcZ);
3626 }
3627
3628 internal.write(x, y, z, color);
3629 }
3630
Alexis Hetua76a1bf2016-11-29 17:17:26 -05003631 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge)
3632 {
3633 Surface *dst = this;
3634
3635 // Figure out if the edges to be copied in reverse order respectively from one another
3636 // The copy should be reversed whenever the same edges are contiguous or if we're
3637 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
3638 //
3639 // | +y |
3640 // | -x | +z | +x | -z |
3641 // | -y |
3642
3643 bool reverse = (srcEdge == dstEdge) ||
3644 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
3645 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
3646 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
3647 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
3648
3649 int srcBytes = src->bytes(src->Surface::getInternalFormat());
3650 int srcPitch = src->getInternalPitchB();
3651 int dstBytes = dst->bytes(dst->Surface::getInternalFormat());
3652 int dstPitch = dst->getInternalPitchB();
3653
3654 int srcW = src->getWidth();
3655 int srcH = src->getHeight();
3656 int dstW = dst->getWidth();
3657 int dstH = dst->getHeight();
3658
3659 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes);
3660
3661 // Src is expressed in the regular [0, width-1], [0, height-1] space
3662 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch;
3663 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0));
3664
3665 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space
3666 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1);
3667 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta);
3668
3669 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart;
3670 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart;
3671
3672 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta)
3673 {
3674 memcpy(dstBuf, srcBuf, srcBytes);
3675 }
3676
3677 if(dstEdge == LEFT || dstEdge == RIGHT)
3678 {
3679 // TOP and BOTTOM are already set, let's average out the corners
3680 int x0 = (dstEdge == RIGHT) ? dstW : -1;
3681 int y0 = -1;
3682 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0;
3683 int y1 = 0;
3684 dst->computeCubeCorner(x0, y0, x1, y1);
3685 y0 = dstH;
3686 y1 = dstH - 1;
3687 dst->computeCubeCorner(x0, y0, x1, y1);
3688 }
3689
3690 src->unlockInternal();
3691 dst->unlockInternal();
3692 }
3693
3694 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1)
3695 {
3696 ASSERT(internal.lock != LOCK_UNLOCKED);
3697
3698 sw::Color<float> color = internal.read(x0, y1);
3699 color += internal.read(x1, y0);
3700 color += internal.read(x1, y1);
3701 color *= (1.0f / 3.0f);
3702
3703 internal.write(x0, y0, color);
3704 }
3705
John Bauman89401822014-05-06 15:04:28 -04003706 bool Surface::hasStencil() const
3707 {
3708 return isStencil(external.format);
3709 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003710
John Bauman89401822014-05-06 15:04:28 -04003711 bool Surface::hasDepth() const
3712 {
3713 return isDepth(external.format);
3714 }
3715
3716 bool Surface::hasPalette() const
3717 {
3718 return isPalette(external.format);
3719 }
3720
3721 bool Surface::isRenderTarget() const
3722 {
3723 return renderTarget;
3724 }
3725
Nicolas Capens73e18c12017-11-28 13:31:35 -05003726 bool Surface::hasDirtyContents() const
John Bauman89401822014-05-06 15:04:28 -04003727 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003728 return dirtyContents;
John Bauman89401822014-05-06 15:04:28 -04003729 }
3730
Nicolas Capens73e18c12017-11-28 13:31:35 -05003731 void Surface::markContentsClean()
John Bauman89401822014-05-06 15:04:28 -04003732 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003733 dirtyContents = false;
John Bauman89401822014-05-06 15:04:28 -04003734 }
3735
3736 Resource *Surface::getResource()
3737 {
3738 return resource;
3739 }
3740
3741 bool Surface::identicalFormats() const
3742 {
John Bauman66b8ab22014-05-06 15:57:45 -04003743 return external.format == internal.format &&
3744 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003745 external.height == internal.height &&
3746 external.depth == internal.depth &&
3747 external.pitchB == internal.pitchB &&
Alexis Hetu9c6d5222016-11-29 17:02:14 -05003748 external.sliceB == internal.sliceB &&
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003749 external.border == internal.border &&
3750 external.samples == internal.samples;
John Bauman89401822014-05-06 15:04:28 -04003751 }
3752
3753 Format Surface::selectInternalFormat(Format format) const
3754 {
3755 switch(format)
3756 {
3757 case FORMAT_NULL:
3758 return FORMAT_NULL;
3759 case FORMAT_P8:
3760 case FORMAT_A8P8:
3761 case FORMAT_A4R4G4B4:
3762 case FORMAT_A1R5G5B5:
3763 case FORMAT_A8R3G3B2:
3764 return FORMAT_A8R8G8B8;
3765 case FORMAT_A8:
3766 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003767 case FORMAT_R8I:
3768 return FORMAT_R8I;
3769 case FORMAT_R8UI:
3770 return FORMAT_R8UI;
3771 case FORMAT_R8I_SNORM:
3772 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003773 case FORMAT_R8:
3774 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003775 case FORMAT_R16I:
3776 return FORMAT_R16I;
3777 case FORMAT_R16UI:
3778 return FORMAT_R16UI;
3779 case FORMAT_R32I:
3780 return FORMAT_R32I;
3781 case FORMAT_R32UI:
3782 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003783 case FORMAT_X16B16G16R16I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003784 return FORMAT_X16B16G16R16I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003785 case FORMAT_A16B16G16R16I:
3786 return FORMAT_A16B16G16R16I;
3787 case FORMAT_X16B16G16R16UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003788 return FORMAT_X16B16G16R16UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003789 case FORMAT_A16B16G16R16UI:
3790 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003791 case FORMAT_A2R10G10B10:
3792 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003793 case FORMAT_A16B16G16R16:
3794 return FORMAT_A16B16G16R16;
Nicolas Capens5555af42017-12-14 13:14:03 -05003795 case FORMAT_A2B10G10R10UI:
3796 return FORMAT_A16B16G16R16UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003797 case FORMAT_X32B32G32R32I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003798 return FORMAT_X32B32G32R32I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003799 case FORMAT_A32B32G32R32I:
3800 return FORMAT_A32B32G32R32I;
3801 case FORMAT_X32B32G32R32UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003802 return FORMAT_X32B32G32R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003803 case FORMAT_A32B32G32R32UI:
3804 return FORMAT_A32B32G32R32UI;
3805 case FORMAT_G8R8I:
3806 return FORMAT_G8R8I;
3807 case FORMAT_G8R8UI:
3808 return FORMAT_G8R8UI;
3809 case FORMAT_G8R8I_SNORM:
3810 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003811 case FORMAT_G8R8:
3812 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003813 case FORMAT_G16R16I:
3814 return FORMAT_G16R16I;
3815 case FORMAT_G16R16UI:
3816 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003817 case FORMAT_G16R16:
3818 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003819 case FORMAT_G32R32I:
3820 return FORMAT_G32R32I;
3821 case FORMAT_G32R32UI:
3822 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003823 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003824 if(lockable || !quadLayoutEnabled)
3825 {
3826 return FORMAT_A8R8G8B8;
3827 }
3828 else
3829 {
3830 return FORMAT_A8G8R8B8Q;
3831 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003832 case FORMAT_A8B8G8R8I:
3833 return FORMAT_A8B8G8R8I;
3834 case FORMAT_A8B8G8R8UI:
3835 return FORMAT_A8B8G8R8UI;
3836 case FORMAT_A8B8G8R8I_SNORM:
3837 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003838 case FORMAT_R5G5B5A1:
3839 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003840 case FORMAT_A8B8G8R8:
3841 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003842 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003843 return FORMAT_R5G6B5;
3844 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003845 case FORMAT_R8G8B8:
3846 case FORMAT_X4R4G4B4:
3847 case FORMAT_X1R5G5B5:
3848 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003849 if(lockable || !quadLayoutEnabled)
3850 {
3851 return FORMAT_X8R8G8B8;
3852 }
3853 else
3854 {
3855 return FORMAT_X8G8R8B8Q;
3856 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003857 case FORMAT_X8B8G8R8I:
3858 return FORMAT_X8B8G8R8I;
3859 case FORMAT_X8B8G8R8UI:
3860 return FORMAT_X8B8G8R8UI;
3861 case FORMAT_X8B8G8R8I_SNORM:
3862 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003863 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003864 case FORMAT_X8B8G8R8:
3865 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003866 case FORMAT_SRGB8_X8:
3867 return FORMAT_SRGB8_X8;
3868 case FORMAT_SRGB8_A8:
3869 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003870 // Compressed formats
3871 #if S3TC_SUPPORT
3872 case FORMAT_DXT1:
3873 case FORMAT_DXT3:
3874 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003875 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003876 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3877 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3878 case FORMAT_RGBA8_ETC2_EAC:
3879 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3880 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3881 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3882 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3883 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3884 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3885 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3886 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3887 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3888 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3889 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3890 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3891 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3892 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3893 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3894 return FORMAT_A8R8G8B8;
3895 case FORMAT_RGBA_ASTC_4x4_KHR:
3896 case FORMAT_RGBA_ASTC_5x4_KHR:
3897 case FORMAT_RGBA_ASTC_5x5_KHR:
3898 case FORMAT_RGBA_ASTC_6x5_KHR:
3899 case FORMAT_RGBA_ASTC_6x6_KHR:
3900 case FORMAT_RGBA_ASTC_8x5_KHR:
3901 case FORMAT_RGBA_ASTC_8x6_KHR:
3902 case FORMAT_RGBA_ASTC_8x8_KHR:
3903 case FORMAT_RGBA_ASTC_10x5_KHR:
3904 case FORMAT_RGBA_ASTC_10x6_KHR:
3905 case FORMAT_RGBA_ASTC_10x8_KHR:
3906 case FORMAT_RGBA_ASTC_10x10_KHR:
3907 case FORMAT_RGBA_ASTC_12x10_KHR:
3908 case FORMAT_RGBA_ASTC_12x12_KHR:
3909 // ASTC supports HDR, so a floating point format is required to represent it properly
3910 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003911 case FORMAT_ATI1:
3912 return FORMAT_R8;
Alexis Hetuf46493f2017-12-18 15:32:26 -05003913 case FORMAT_R11_EAC:
Alexis Hetu0de50d42015-09-09 13:56:41 -04003914 case FORMAT_SIGNED_R11_EAC:
3915 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003916 case FORMAT_ATI2:
3917 return FORMAT_G8R8;
Alexis Hetuf46493f2017-12-18 15:32:26 -05003918 case FORMAT_RG11_EAC:
Alexis Hetu0de50d42015-09-09 13:56:41 -04003919 case FORMAT_SIGNED_RG11_EAC:
3920 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003921 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003922 case FORMAT_RGB8_ETC2:
3923 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003924 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003925 // Bumpmap formats
3926 case FORMAT_V8U8: return FORMAT_V8U8;
3927 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3928 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3929 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3930 case FORMAT_V16U16: return FORMAT_V16U16;
3931 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3932 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3933 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003934 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003935 case FORMAT_R16F: return FORMAT_R32F;
3936 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003937 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003938 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003939 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003940 case FORMAT_R32F: return FORMAT_R32F;
3941 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003942 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3943 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003944 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3945 // Luminance formats
3946 case FORMAT_L8: return FORMAT_L8;
3947 case FORMAT_A4L4: return FORMAT_A8L8;
3948 case FORMAT_L16: return FORMAT_L16;
3949 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003950 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003951 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003952 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003953 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003954 // Depth/stencil formats
3955 case FORMAT_D16:
3956 case FORMAT_D32:
3957 case FORMAT_D24X8:
John Bauman89401822014-05-06 15:04:28 -04003958 if(hasParent) // Texture
3959 {
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003960 return FORMAT_D32F_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003961 }
3962 else if(complementaryDepthBuffer)
3963 {
3964 return FORMAT_D32F_COMPLEMENTARY;
3965 }
3966 else
3967 {
3968 return FORMAT_D32F;
3969 }
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003970 case FORMAT_D24S8:
3971 case FORMAT_D24FS8:
3972 if(hasParent) // Texture
3973 {
3974 return FORMAT_D32FS8_SHADOW;
3975 }
3976 else if(complementaryDepthBuffer)
3977 {
3978 return FORMAT_D32FS8_COMPLEMENTARY;
3979 }
3980 else
3981 {
3982 return FORMAT_D32FS8;
3983 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003984 case FORMAT_D32F: return FORMAT_D32F;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003985 case FORMAT_D32FS8: return FORMAT_D32FS8;
John Bauman66b8ab22014-05-06 15:57:45 -04003986 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3987 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3988 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3989 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3990 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003991 case FORMAT_S8: return FORMAT_S8;
3992 // YUV formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003993 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3994 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3995 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003996 default:
3997 ASSERT(false);
3998 }
3999
4000 return FORMAT_NULL;
4001 }
4002
4003 void Surface::setTexturePalette(unsigned int *palette)
4004 {
4005 Surface::palette = palette;
4006 Surface::paletteID++;
4007 }
4008
4009 void Surface::resolve()
4010 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004011 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
John Bauman89401822014-05-06 15:04:28 -04004012 {
4013 return;
4014 }
4015
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004016 ASSERT(internal.depth == 1); // Unimplemented
4017
John Bauman89401822014-05-06 15:04:28 -04004018 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
4019
John Bauman89401822014-05-06 15:04:28 -04004020 int width = internal.width;
4021 int height = internal.height;
4022 int pitch = internal.pitchB;
4023 int slice = internal.sliceB;
4024
4025 unsigned char *source0 = (unsigned char*)source;
4026 unsigned char *source1 = source0 + slice;
4027 unsigned char *source2 = source1 + slice;
4028 unsigned char *source3 = source2 + slice;
4029 unsigned char *source4 = source3 + slice;
4030 unsigned char *source5 = source4 + slice;
4031 unsigned char *source6 = source5 + slice;
4032 unsigned char *source7 = source6 + slice;
4033 unsigned char *source8 = source7 + slice;
4034 unsigned char *source9 = source8 + slice;
4035 unsigned char *sourceA = source9 + slice;
4036 unsigned char *sourceB = sourceA + slice;
4037 unsigned char *sourceC = sourceB + slice;
4038 unsigned char *sourceD = sourceC + slice;
4039 unsigned char *sourceE = sourceD + slice;
4040 unsigned char *sourceF = sourceE + slice;
4041
Alexis Hetu049a1872016-04-25 16:59:58 -04004042 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
4043 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
4044 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04004045 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004046 #if defined(__i386__) || defined(__x86_64__)
4047 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004048 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004049 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004050 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004051 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004052 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004053 for(int x = 0; x < width; x += 4)
4054 {
4055 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4056 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004057
Nicolas Capens47dc8672017-04-25 12:54:39 -04004058 c0 = _mm_avg_epu8(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004059
Nicolas Capens47dc8672017-04-25 12:54:39 -04004060 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4061 }
4062
4063 source0 += pitch;
4064 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004065 }
John Bauman89401822014-05-06 15:04:28 -04004066 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004067 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004068 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004069 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004070 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004071 for(int x = 0; x < width; x += 4)
4072 {
4073 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4074 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4075 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4076 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004077
Nicolas Capens47dc8672017-04-25 12:54:39 -04004078 c0 = _mm_avg_epu8(c0, c1);
4079 c2 = _mm_avg_epu8(c2, c3);
4080 c0 = _mm_avg_epu8(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004081
Nicolas Capens47dc8672017-04-25 12:54:39 -04004082 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4083 }
4084
4085 source0 += pitch;
4086 source1 += pitch;
4087 source2 += pitch;
4088 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004089 }
John Bauman89401822014-05-06 15:04:28 -04004090 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004091 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004092 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004093 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004094 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004095 for(int x = 0; x < width; x += 4)
4096 {
4097 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4098 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4099 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4100 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4101 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4102 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4103 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4104 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004105
Nicolas Capens47dc8672017-04-25 12:54:39 -04004106 c0 = _mm_avg_epu8(c0, c1);
4107 c2 = _mm_avg_epu8(c2, c3);
4108 c4 = _mm_avg_epu8(c4, c5);
4109 c6 = _mm_avg_epu8(c6, c7);
4110 c0 = _mm_avg_epu8(c0, c2);
4111 c4 = _mm_avg_epu8(c4, c6);
4112 c0 = _mm_avg_epu8(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004113
Nicolas Capens47dc8672017-04-25 12:54:39 -04004114 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4115 }
4116
4117 source0 += pitch;
4118 source1 += pitch;
4119 source2 += pitch;
4120 source3 += pitch;
4121 source4 += pitch;
4122 source5 += pitch;
4123 source6 += pitch;
4124 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004125 }
John Bauman89401822014-05-06 15:04:28 -04004126 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004127 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004128 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004129 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004130 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004131 for(int x = 0; x < width; x += 4)
4132 {
4133 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4134 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4135 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4136 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4137 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4138 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4139 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4140 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4141 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4142 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4143 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4144 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4145 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4146 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4147 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4148 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004149
Nicolas Capens47dc8672017-04-25 12:54:39 -04004150 c0 = _mm_avg_epu8(c0, c1);
4151 c2 = _mm_avg_epu8(c2, c3);
4152 c4 = _mm_avg_epu8(c4, c5);
4153 c6 = _mm_avg_epu8(c6, c7);
4154 c8 = _mm_avg_epu8(c8, c9);
4155 cA = _mm_avg_epu8(cA, cB);
4156 cC = _mm_avg_epu8(cC, cD);
4157 cE = _mm_avg_epu8(cE, cF);
4158 c0 = _mm_avg_epu8(c0, c2);
4159 c4 = _mm_avg_epu8(c4, c6);
4160 c8 = _mm_avg_epu8(c8, cA);
4161 cC = _mm_avg_epu8(cC, cE);
4162 c0 = _mm_avg_epu8(c0, c4);
4163 c8 = _mm_avg_epu8(c8, cC);
4164 c0 = _mm_avg_epu8(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004165
Nicolas Capens47dc8672017-04-25 12:54:39 -04004166 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4167 }
4168
4169 source0 += pitch;
4170 source1 += pitch;
4171 source2 += pitch;
4172 source3 += pitch;
4173 source4 += pitch;
4174 source5 += pitch;
4175 source6 += pitch;
4176 source7 += pitch;
4177 source8 += pitch;
4178 source9 += pitch;
4179 sourceA += pitch;
4180 sourceB += pitch;
4181 sourceC += pitch;
4182 sourceD += pitch;
4183 sourceE += pitch;
4184 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004185 }
John Bauman89401822014-05-06 15:04:28 -04004186 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004187 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004188 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004189 else
4190 #endif
John Bauman89401822014-05-06 15:04:28 -04004191 {
4192 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
4193
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004194 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004195 {
4196 for(int y = 0; y < height; y++)
4197 {
4198 for(int x = 0; x < width; x++)
4199 {
4200 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4201 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4202
4203 c0 = AVERAGE(c0, c1);
4204
4205 *(unsigned int*)(source0 + 4 * x) = c0;
4206 }
4207
4208 source0 += pitch;
4209 source1 += pitch;
4210 }
4211 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004212 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004213 {
4214 for(int y = 0; y < height; y++)
4215 {
4216 for(int x = 0; x < width; x++)
4217 {
4218 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4219 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4220 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4221 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4222
4223 c0 = AVERAGE(c0, c1);
4224 c2 = AVERAGE(c2, c3);
4225 c0 = AVERAGE(c0, c2);
4226
4227 *(unsigned int*)(source0 + 4 * x) = c0;
4228 }
4229
4230 source0 += pitch;
4231 source1 += pitch;
4232 source2 += pitch;
4233 source3 += pitch;
4234 }
4235 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004236 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004237 {
4238 for(int y = 0; y < height; y++)
4239 {
4240 for(int x = 0; x < width; x++)
4241 {
4242 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4243 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4244 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4245 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4246 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4247 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4248 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4249 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4250
4251 c0 = AVERAGE(c0, c1);
4252 c2 = AVERAGE(c2, c3);
4253 c4 = AVERAGE(c4, c5);
4254 c6 = AVERAGE(c6, c7);
4255 c0 = AVERAGE(c0, c2);
4256 c4 = AVERAGE(c4, c6);
4257 c0 = AVERAGE(c0, c4);
4258
4259 *(unsigned int*)(source0 + 4 * x) = c0;
4260 }
4261
4262 source0 += pitch;
4263 source1 += pitch;
4264 source2 += pitch;
4265 source3 += pitch;
4266 source4 += pitch;
4267 source5 += pitch;
4268 source6 += pitch;
4269 source7 += pitch;
4270 }
4271 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004272 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004273 {
4274 for(int y = 0; y < height; y++)
4275 {
4276 for(int x = 0; x < width; x++)
4277 {
4278 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4279 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4280 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4281 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4282 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4283 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4284 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4285 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4286 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4287 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4288 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4289 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4290 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4291 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4292 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4293 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4294
4295 c0 = AVERAGE(c0, c1);
4296 c2 = AVERAGE(c2, c3);
4297 c4 = AVERAGE(c4, c5);
4298 c6 = AVERAGE(c6, c7);
4299 c8 = AVERAGE(c8, c9);
4300 cA = AVERAGE(cA, cB);
4301 cC = AVERAGE(cC, cD);
4302 cE = AVERAGE(cE, cF);
4303 c0 = AVERAGE(c0, c2);
4304 c4 = AVERAGE(c4, c6);
4305 c8 = AVERAGE(c8, cA);
4306 cC = AVERAGE(cC, cE);
4307 c0 = AVERAGE(c0, c4);
4308 c8 = AVERAGE(c8, cC);
4309 c0 = AVERAGE(c0, c8);
4310
4311 *(unsigned int*)(source0 + 4 * x) = c0;
4312 }
4313
4314 source0 += pitch;
4315 source1 += pitch;
4316 source2 += pitch;
4317 source3 += pitch;
4318 source4 += pitch;
4319 source5 += pitch;
4320 source6 += pitch;
4321 source7 += pitch;
4322 source8 += pitch;
4323 source9 += pitch;
4324 sourceA += pitch;
4325 sourceB += pitch;
4326 sourceC += pitch;
4327 sourceD += pitch;
4328 sourceE += pitch;
4329 sourceF += pitch;
4330 }
4331 }
4332 else ASSERT(false);
4333
4334 #undef AVERAGE
4335 }
4336 }
4337 else if(internal.format == FORMAT_G16R16)
4338 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004339
4340 #if defined(__i386__) || defined(__x86_64__)
4341 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004342 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004343 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004344 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004345 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004346 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004347 for(int x = 0; x < width; x += 4)
4348 {
4349 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4350 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004351
Nicolas Capens47dc8672017-04-25 12:54:39 -04004352 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004353
Nicolas Capens47dc8672017-04-25 12:54:39 -04004354 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4355 }
4356
4357 source0 += pitch;
4358 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004359 }
John Bauman89401822014-05-06 15:04:28 -04004360 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004361 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004362 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004363 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004364 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004365 for(int x = 0; x < width; x += 4)
4366 {
4367 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4368 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4369 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4370 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004371
Nicolas Capens47dc8672017-04-25 12:54:39 -04004372 c0 = _mm_avg_epu16(c0, c1);
4373 c2 = _mm_avg_epu16(c2, c3);
4374 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004375
Nicolas Capens47dc8672017-04-25 12:54:39 -04004376 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4377 }
4378
4379 source0 += pitch;
4380 source1 += pitch;
4381 source2 += pitch;
4382 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004383 }
John Bauman89401822014-05-06 15:04:28 -04004384 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004385 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004386 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004387 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004388 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004389 for(int x = 0; x < width; x += 4)
4390 {
4391 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4392 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4393 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4394 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4395 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4396 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4397 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4398 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004399
Nicolas Capens47dc8672017-04-25 12:54:39 -04004400 c0 = _mm_avg_epu16(c0, c1);
4401 c2 = _mm_avg_epu16(c2, c3);
4402 c4 = _mm_avg_epu16(c4, c5);
4403 c6 = _mm_avg_epu16(c6, c7);
4404 c0 = _mm_avg_epu16(c0, c2);
4405 c4 = _mm_avg_epu16(c4, c6);
4406 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004407
Nicolas Capens47dc8672017-04-25 12:54:39 -04004408 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4409 }
4410
4411 source0 += pitch;
4412 source1 += pitch;
4413 source2 += pitch;
4414 source3 += pitch;
4415 source4 += pitch;
4416 source5 += pitch;
4417 source6 += pitch;
4418 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004419 }
John Bauman89401822014-05-06 15:04:28 -04004420 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004421 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004422 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004423 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004424 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004425 for(int x = 0; x < width; x += 4)
4426 {
4427 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4428 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4429 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4430 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4431 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4432 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4433 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4434 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4435 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4436 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4437 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4438 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4439 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4440 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4441 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4442 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004443
Nicolas Capens47dc8672017-04-25 12:54:39 -04004444 c0 = _mm_avg_epu16(c0, c1);
4445 c2 = _mm_avg_epu16(c2, c3);
4446 c4 = _mm_avg_epu16(c4, c5);
4447 c6 = _mm_avg_epu16(c6, c7);
4448 c8 = _mm_avg_epu16(c8, c9);
4449 cA = _mm_avg_epu16(cA, cB);
4450 cC = _mm_avg_epu16(cC, cD);
4451 cE = _mm_avg_epu16(cE, cF);
4452 c0 = _mm_avg_epu16(c0, c2);
4453 c4 = _mm_avg_epu16(c4, c6);
4454 c8 = _mm_avg_epu16(c8, cA);
4455 cC = _mm_avg_epu16(cC, cE);
4456 c0 = _mm_avg_epu16(c0, c4);
4457 c8 = _mm_avg_epu16(c8, cC);
4458 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004459
Nicolas Capens47dc8672017-04-25 12:54:39 -04004460 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4461 }
4462
4463 source0 += pitch;
4464 source1 += pitch;
4465 source2 += pitch;
4466 source3 += pitch;
4467 source4 += pitch;
4468 source5 += pitch;
4469 source6 += pitch;
4470 source7 += pitch;
4471 source8 += pitch;
4472 source9 += pitch;
4473 sourceA += pitch;
4474 sourceB += pitch;
4475 sourceC += pitch;
4476 sourceD += pitch;
4477 sourceE += pitch;
4478 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004479 }
John Bauman89401822014-05-06 15:04:28 -04004480 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004481 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004482 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004483 else
4484 #endif
John Bauman89401822014-05-06 15:04:28 -04004485 {
4486 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4487
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004488 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004489 {
4490 for(int y = 0; y < height; y++)
4491 {
4492 for(int x = 0; x < width; x++)
4493 {
4494 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4495 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4496
4497 c0 = AVERAGE(c0, c1);
4498
4499 *(unsigned int*)(source0 + 4 * x) = c0;
4500 }
4501
4502 source0 += pitch;
4503 source1 += pitch;
4504 }
4505 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004506 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004507 {
4508 for(int y = 0; y < height; y++)
4509 {
4510 for(int x = 0; x < width; x++)
4511 {
4512 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4513 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4514 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4515 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4516
4517 c0 = AVERAGE(c0, c1);
4518 c2 = AVERAGE(c2, c3);
4519 c0 = AVERAGE(c0, c2);
4520
4521 *(unsigned int*)(source0 + 4 * x) = c0;
4522 }
4523
4524 source0 += pitch;
4525 source1 += pitch;
4526 source2 += pitch;
4527 source3 += pitch;
4528 }
4529 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004530 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004531 {
4532 for(int y = 0; y < height; y++)
4533 {
4534 for(int x = 0; x < width; x++)
4535 {
4536 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4537 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4538 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4539 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4540 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4541 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4542 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4543 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4544
4545 c0 = AVERAGE(c0, c1);
4546 c2 = AVERAGE(c2, c3);
4547 c4 = AVERAGE(c4, c5);
4548 c6 = AVERAGE(c6, c7);
4549 c0 = AVERAGE(c0, c2);
4550 c4 = AVERAGE(c4, c6);
4551 c0 = AVERAGE(c0, c4);
4552
4553 *(unsigned int*)(source0 + 4 * x) = c0;
4554 }
4555
4556 source0 += pitch;
4557 source1 += pitch;
4558 source2 += pitch;
4559 source3 += pitch;
4560 source4 += pitch;
4561 source5 += pitch;
4562 source6 += pitch;
4563 source7 += pitch;
4564 }
4565 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004566 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004567 {
4568 for(int y = 0; y < height; y++)
4569 {
4570 for(int x = 0; x < width; x++)
4571 {
4572 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4573 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4574 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4575 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4576 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4577 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4578 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4579 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4580 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4581 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4582 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4583 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4584 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4585 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4586 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4587 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4588
4589 c0 = AVERAGE(c0, c1);
4590 c2 = AVERAGE(c2, c3);
4591 c4 = AVERAGE(c4, c5);
4592 c6 = AVERAGE(c6, c7);
4593 c8 = AVERAGE(c8, c9);
4594 cA = AVERAGE(cA, cB);
4595 cC = AVERAGE(cC, cD);
4596 cE = AVERAGE(cE, cF);
4597 c0 = AVERAGE(c0, c2);
4598 c4 = AVERAGE(c4, c6);
4599 c8 = AVERAGE(c8, cA);
4600 cC = AVERAGE(cC, cE);
4601 c0 = AVERAGE(c0, c4);
4602 c8 = AVERAGE(c8, cC);
4603 c0 = AVERAGE(c0, c8);
4604
4605 *(unsigned int*)(source0 + 4 * x) = c0;
4606 }
4607
4608 source0 += pitch;
4609 source1 += pitch;
4610 source2 += pitch;
4611 source3 += pitch;
4612 source4 += pitch;
4613 source5 += pitch;
4614 source6 += pitch;
4615 source7 += pitch;
4616 source8 += pitch;
4617 source9 += pitch;
4618 sourceA += pitch;
4619 sourceB += pitch;
4620 sourceC += pitch;
4621 sourceD += pitch;
4622 sourceE += pitch;
4623 sourceF += pitch;
4624 }
4625 }
4626 else ASSERT(false);
4627
4628 #undef AVERAGE
4629 }
4630 }
4631 else if(internal.format == FORMAT_A16B16G16R16)
4632 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004633 #if defined(__i386__) || defined(__x86_64__)
4634 if(CPUID::supportsSSE2() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004635 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004636 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004637 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004638 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004639 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004640 for(int x = 0; x < width; x += 2)
4641 {
4642 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4643 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004644
Nicolas Capens47dc8672017-04-25 12:54:39 -04004645 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004646
Nicolas Capens47dc8672017-04-25 12:54:39 -04004647 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4648 }
4649
4650 source0 += pitch;
4651 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004652 }
John Bauman89401822014-05-06 15:04:28 -04004653 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004654 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004655 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004656 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004657 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004658 for(int x = 0; x < width; x += 2)
4659 {
4660 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4661 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4662 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4663 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004664
Nicolas Capens47dc8672017-04-25 12:54:39 -04004665 c0 = _mm_avg_epu16(c0, c1);
4666 c2 = _mm_avg_epu16(c2, c3);
4667 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004668
Nicolas Capens47dc8672017-04-25 12:54:39 -04004669 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4670 }
4671
4672 source0 += pitch;
4673 source1 += pitch;
4674 source2 += pitch;
4675 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004676 }
John Bauman89401822014-05-06 15:04:28 -04004677 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004678 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004679 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004680 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004681 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004682 for(int x = 0; x < width; x += 2)
4683 {
4684 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4685 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4686 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4687 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4688 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4689 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4690 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4691 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004692
Nicolas Capens47dc8672017-04-25 12:54:39 -04004693 c0 = _mm_avg_epu16(c0, c1);
4694 c2 = _mm_avg_epu16(c2, c3);
4695 c4 = _mm_avg_epu16(c4, c5);
4696 c6 = _mm_avg_epu16(c6, c7);
4697 c0 = _mm_avg_epu16(c0, c2);
4698 c4 = _mm_avg_epu16(c4, c6);
4699 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004700
Nicolas Capens47dc8672017-04-25 12:54:39 -04004701 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4702 }
4703
4704 source0 += pitch;
4705 source1 += pitch;
4706 source2 += pitch;
4707 source3 += pitch;
4708 source4 += pitch;
4709 source5 += pitch;
4710 source6 += pitch;
4711 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004712 }
John Bauman89401822014-05-06 15:04:28 -04004713 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004714 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004715 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004716 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004717 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004718 for(int x = 0; x < width; x += 2)
4719 {
4720 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4721 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4722 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4723 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4724 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4725 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4726 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4727 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4728 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4729 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4730 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4731 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4732 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4733 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4734 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4735 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04004736
Nicolas Capens47dc8672017-04-25 12:54:39 -04004737 c0 = _mm_avg_epu16(c0, c1);
4738 c2 = _mm_avg_epu16(c2, c3);
4739 c4 = _mm_avg_epu16(c4, c5);
4740 c6 = _mm_avg_epu16(c6, c7);
4741 c8 = _mm_avg_epu16(c8, c9);
4742 cA = _mm_avg_epu16(cA, cB);
4743 cC = _mm_avg_epu16(cC, cD);
4744 cE = _mm_avg_epu16(cE, cF);
4745 c0 = _mm_avg_epu16(c0, c2);
4746 c4 = _mm_avg_epu16(c4, c6);
4747 c8 = _mm_avg_epu16(c8, cA);
4748 cC = _mm_avg_epu16(cC, cE);
4749 c0 = _mm_avg_epu16(c0, c4);
4750 c8 = _mm_avg_epu16(c8, cC);
4751 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004752
Nicolas Capens47dc8672017-04-25 12:54:39 -04004753 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4754 }
4755
4756 source0 += pitch;
4757 source1 += pitch;
4758 source2 += pitch;
4759 source3 += pitch;
4760 source4 += pitch;
4761 source5 += pitch;
4762 source6 += pitch;
4763 source7 += pitch;
4764 source8 += pitch;
4765 source9 += pitch;
4766 sourceA += pitch;
4767 sourceB += pitch;
4768 sourceC += pitch;
4769 sourceD += pitch;
4770 sourceE += pitch;
4771 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004772 }
John Bauman89401822014-05-06 15:04:28 -04004773 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004774 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004775 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004776 else
4777 #endif
John Bauman89401822014-05-06 15:04:28 -04004778 {
4779 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4780
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004781 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004782 {
4783 for(int y = 0; y < height; y++)
4784 {
4785 for(int x = 0; x < 2 * width; x++)
4786 {
4787 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4788 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4789
4790 c0 = AVERAGE(c0, c1);
4791
4792 *(unsigned int*)(source0 + 4 * x) = c0;
4793 }
4794
4795 source0 += pitch;
4796 source1 += pitch;
4797 }
4798 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004799 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004800 {
4801 for(int y = 0; y < height; y++)
4802 {
4803 for(int x = 0; x < 2 * width; x++)
4804 {
4805 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4806 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4807 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4808 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4809
4810 c0 = AVERAGE(c0, c1);
4811 c2 = AVERAGE(c2, c3);
4812 c0 = AVERAGE(c0, c2);
4813
4814 *(unsigned int*)(source0 + 4 * x) = c0;
4815 }
4816
4817 source0 += pitch;
4818 source1 += pitch;
4819 source2 += pitch;
4820 source3 += pitch;
4821 }
4822 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004823 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004824 {
4825 for(int y = 0; y < height; y++)
4826 {
4827 for(int x = 0; x < 2 * width; x++)
4828 {
4829 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4830 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4831 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4832 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4833 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4834 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4835 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4836 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4837
4838 c0 = AVERAGE(c0, c1);
4839 c2 = AVERAGE(c2, c3);
4840 c4 = AVERAGE(c4, c5);
4841 c6 = AVERAGE(c6, c7);
4842 c0 = AVERAGE(c0, c2);
4843 c4 = AVERAGE(c4, c6);
4844 c0 = AVERAGE(c0, c4);
4845
4846 *(unsigned int*)(source0 + 4 * x) = c0;
4847 }
4848
4849 source0 += pitch;
4850 source1 += pitch;
4851 source2 += pitch;
4852 source3 += pitch;
4853 source4 += pitch;
4854 source5 += pitch;
4855 source6 += pitch;
4856 source7 += pitch;
4857 }
4858 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004859 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004860 {
4861 for(int y = 0; y < height; y++)
4862 {
4863 for(int x = 0; x < 2 * width; x++)
4864 {
4865 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4866 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4867 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4868 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4869 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4870 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4871 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4872 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4873 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4874 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4875 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4876 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4877 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4878 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4879 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4880 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4881
4882 c0 = AVERAGE(c0, c1);
4883 c2 = AVERAGE(c2, c3);
4884 c4 = AVERAGE(c4, c5);
4885 c6 = AVERAGE(c6, c7);
4886 c8 = AVERAGE(c8, c9);
4887 cA = AVERAGE(cA, cB);
4888 cC = AVERAGE(cC, cD);
4889 cE = AVERAGE(cE, cF);
4890 c0 = AVERAGE(c0, c2);
4891 c4 = AVERAGE(c4, c6);
4892 c8 = AVERAGE(c8, cA);
4893 cC = AVERAGE(cC, cE);
4894 c0 = AVERAGE(c0, c4);
4895 c8 = AVERAGE(c8, cC);
4896 c0 = AVERAGE(c0, c8);
4897
4898 *(unsigned int*)(source0 + 4 * x) = c0;
4899 }
4900
4901 source0 += pitch;
4902 source1 += pitch;
4903 source2 += pitch;
4904 source3 += pitch;
4905 source4 += pitch;
4906 source5 += pitch;
4907 source6 += pitch;
4908 source7 += pitch;
4909 source8 += pitch;
4910 source9 += pitch;
4911 sourceA += pitch;
4912 sourceB += pitch;
4913 sourceC += pitch;
4914 sourceD += pitch;
4915 sourceE += pitch;
4916 sourceF += pitch;
4917 }
4918 }
4919 else ASSERT(false);
4920
4921 #undef AVERAGE
4922 }
4923 }
4924 else if(internal.format == FORMAT_R32F)
4925 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004926 #if defined(__i386__) || defined(__x86_64__)
4927 if(CPUID::supportsSSE() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004928 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004929 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004930 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004931 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004932 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004933 for(int x = 0; x < width; x += 4)
4934 {
4935 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4936 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004937
Nicolas Capens47dc8672017-04-25 12:54:39 -04004938 c0 = _mm_add_ps(c0, c1);
4939 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004940
Nicolas Capens47dc8672017-04-25 12:54:39 -04004941 _mm_store_ps((float*)(source0 + 4 * x), c0);
4942 }
4943
4944 source0 += pitch;
4945 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004946 }
John Bauman89401822014-05-06 15:04:28 -04004947 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004948 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004949 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004950 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004951 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004952 for(int x = 0; x < width; x += 4)
4953 {
4954 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4955 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4956 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4957 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004958
Nicolas Capens47dc8672017-04-25 12:54:39 -04004959 c0 = _mm_add_ps(c0, c1);
4960 c2 = _mm_add_ps(c2, c3);
4961 c0 = _mm_add_ps(c0, c2);
4962 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004963
Nicolas Capens47dc8672017-04-25 12:54:39 -04004964 _mm_store_ps((float*)(source0 + 4 * x), c0);
4965 }
4966
4967 source0 += pitch;
4968 source1 += pitch;
4969 source2 += pitch;
4970 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004971 }
John Bauman89401822014-05-06 15:04:28 -04004972 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004973 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004974 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004975 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004976 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004977 for(int x = 0; x < width; x += 4)
4978 {
4979 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4980 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4981 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4982 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4983 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4984 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4985 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4986 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004987
Nicolas Capens47dc8672017-04-25 12:54:39 -04004988 c0 = _mm_add_ps(c0, c1);
4989 c2 = _mm_add_ps(c2, c3);
4990 c4 = _mm_add_ps(c4, c5);
4991 c6 = _mm_add_ps(c6, c7);
4992 c0 = _mm_add_ps(c0, c2);
4993 c4 = _mm_add_ps(c4, c6);
4994 c0 = _mm_add_ps(c0, c4);
4995 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004996
Nicolas Capens47dc8672017-04-25 12:54:39 -04004997 _mm_store_ps((float*)(source0 + 4 * x), c0);
4998 }
4999
5000 source0 += pitch;
5001 source1 += pitch;
5002 source2 += pitch;
5003 source3 += pitch;
5004 source4 += pitch;
5005 source5 += pitch;
5006 source6 += pitch;
5007 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005008 }
John Bauman89401822014-05-06 15:04:28 -04005009 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005010 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005011 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005012 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005013 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005014 for(int x = 0; x < width; x += 4)
5015 {
5016 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
5017 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
5018 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
5019 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
5020 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
5021 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
5022 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
5023 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
5024 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
5025 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
5026 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
5027 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
5028 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
5029 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
5030 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
5031 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04005032
Nicolas Capens47dc8672017-04-25 12:54:39 -04005033 c0 = _mm_add_ps(c0, c1);
5034 c2 = _mm_add_ps(c2, c3);
5035 c4 = _mm_add_ps(c4, c5);
5036 c6 = _mm_add_ps(c6, c7);
5037 c8 = _mm_add_ps(c8, c9);
5038 cA = _mm_add_ps(cA, cB);
5039 cC = _mm_add_ps(cC, cD);
5040 cE = _mm_add_ps(cE, cF);
5041 c0 = _mm_add_ps(c0, c2);
5042 c4 = _mm_add_ps(c4, c6);
5043 c8 = _mm_add_ps(c8, cA);
5044 cC = _mm_add_ps(cC, cE);
5045 c0 = _mm_add_ps(c0, c4);
5046 c8 = _mm_add_ps(c8, cC);
5047 c0 = _mm_add_ps(c0, c8);
5048 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005049
Nicolas Capens47dc8672017-04-25 12:54:39 -04005050 _mm_store_ps((float*)(source0 + 4 * x), c0);
5051 }
5052
5053 source0 += pitch;
5054 source1 += pitch;
5055 source2 += pitch;
5056 source3 += pitch;
5057 source4 += pitch;
5058 source5 += pitch;
5059 source6 += pitch;
5060 source7 += pitch;
5061 source8 += pitch;
5062 source9 += pitch;
5063 sourceA += pitch;
5064 sourceB += pitch;
5065 sourceC += pitch;
5066 sourceD += pitch;
5067 sourceE += pitch;
5068 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005069 }
John Bauman89401822014-05-06 15:04:28 -04005070 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005071 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005072 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005073 else
5074 #endif
John Bauman89401822014-05-06 15:04:28 -04005075 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005076 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005077 {
5078 for(int y = 0; y < height; y++)
5079 {
5080 for(int x = 0; x < width; x++)
5081 {
5082 float c0 = *(float*)(source0 + 4 * x);
5083 float c1 = *(float*)(source1 + 4 * x);
5084
5085 c0 = c0 + c1;
5086 c0 *= 1.0f / 2.0f;
5087
5088 *(float*)(source0 + 4 * x) = c0;
5089 }
5090
5091 source0 += pitch;
5092 source1 += pitch;
5093 }
5094 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005095 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005096 {
5097 for(int y = 0; y < height; y++)
5098 {
5099 for(int x = 0; x < width; x++)
5100 {
5101 float c0 = *(float*)(source0 + 4 * x);
5102 float c1 = *(float*)(source1 + 4 * x);
5103 float c2 = *(float*)(source2 + 4 * x);
5104 float c3 = *(float*)(source3 + 4 * x);
5105
5106 c0 = c0 + c1;
5107 c2 = c2 + c3;
5108 c0 = c0 + c2;
5109 c0 *= 1.0f / 4.0f;
5110
5111 *(float*)(source0 + 4 * x) = c0;
5112 }
5113
5114 source0 += pitch;
5115 source1 += pitch;
5116 source2 += pitch;
5117 source3 += pitch;
5118 }
5119 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005120 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005121 {
5122 for(int y = 0; y < height; y++)
5123 {
5124 for(int x = 0; x < width; x++)
5125 {
5126 float c0 = *(float*)(source0 + 4 * x);
5127 float c1 = *(float*)(source1 + 4 * x);
5128 float c2 = *(float*)(source2 + 4 * x);
5129 float c3 = *(float*)(source3 + 4 * x);
5130 float c4 = *(float*)(source4 + 4 * x);
5131 float c5 = *(float*)(source5 + 4 * x);
5132 float c6 = *(float*)(source6 + 4 * x);
5133 float c7 = *(float*)(source7 + 4 * x);
5134
5135 c0 = c0 + c1;
5136 c2 = c2 + c3;
5137 c4 = c4 + c5;
5138 c6 = c6 + c7;
5139 c0 = c0 + c2;
5140 c4 = c4 + c6;
5141 c0 = c0 + c4;
5142 c0 *= 1.0f / 8.0f;
5143
5144 *(float*)(source0 + 4 * x) = c0;
5145 }
5146
5147 source0 += pitch;
5148 source1 += pitch;
5149 source2 += pitch;
5150 source3 += pitch;
5151 source4 += pitch;
5152 source5 += pitch;
5153 source6 += pitch;
5154 source7 += pitch;
5155 }
5156 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005157 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005158 {
5159 for(int y = 0; y < height; y++)
5160 {
5161 for(int x = 0; x < width; x++)
5162 {
5163 float c0 = *(float*)(source0 + 4 * x);
5164 float c1 = *(float*)(source1 + 4 * x);
5165 float c2 = *(float*)(source2 + 4 * x);
5166 float c3 = *(float*)(source3 + 4 * x);
5167 float c4 = *(float*)(source4 + 4 * x);
5168 float c5 = *(float*)(source5 + 4 * x);
5169 float c6 = *(float*)(source6 + 4 * x);
5170 float c7 = *(float*)(source7 + 4 * x);
5171 float c8 = *(float*)(source8 + 4 * x);
5172 float c9 = *(float*)(source9 + 4 * x);
5173 float cA = *(float*)(sourceA + 4 * x);
5174 float cB = *(float*)(sourceB + 4 * x);
5175 float cC = *(float*)(sourceC + 4 * x);
5176 float cD = *(float*)(sourceD + 4 * x);
5177 float cE = *(float*)(sourceE + 4 * x);
5178 float cF = *(float*)(sourceF + 4 * x);
5179
5180 c0 = c0 + c1;
5181 c2 = c2 + c3;
5182 c4 = c4 + c5;
5183 c6 = c6 + c7;
5184 c8 = c8 + c9;
5185 cA = cA + cB;
5186 cC = cC + cD;
5187 cE = cE + cF;
5188 c0 = c0 + c2;
5189 c4 = c4 + c6;
5190 c8 = c8 + cA;
5191 cC = cC + cE;
5192 c0 = c0 + c4;
5193 c8 = c8 + cC;
5194 c0 = c0 + c8;
5195 c0 *= 1.0f / 16.0f;
5196
5197 *(float*)(source0 + 4 * x) = c0;
5198 }
5199
5200 source0 += pitch;
5201 source1 += pitch;
5202 source2 += pitch;
5203 source3 += pitch;
5204 source4 += pitch;
5205 source5 += pitch;
5206 source6 += pitch;
5207 source7 += pitch;
5208 source8 += pitch;
5209 source9 += pitch;
5210 sourceA += pitch;
5211 sourceB += pitch;
5212 sourceC += pitch;
5213 sourceD += pitch;
5214 sourceE += pitch;
5215 sourceF += pitch;
5216 }
5217 }
5218 else ASSERT(false);
5219 }
5220 }
5221 else if(internal.format == FORMAT_G32R32F)
5222 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005223 #if defined(__i386__) || defined(__x86_64__)
5224 if(CPUID::supportsSSE() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04005225 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005226 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005227 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005228 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005229 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005230 for(int x = 0; x < width; x += 2)
5231 {
5232 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5233 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005234
Nicolas Capens47dc8672017-04-25 12:54:39 -04005235 c0 = _mm_add_ps(c0, c1);
5236 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005237
Nicolas Capens47dc8672017-04-25 12:54:39 -04005238 _mm_store_ps((float*)(source0 + 8 * x), c0);
5239 }
5240
5241 source0 += pitch;
5242 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005243 }
John Bauman89401822014-05-06 15:04:28 -04005244 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005245 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005246 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005247 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005248 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005249 for(int x = 0; x < width; x += 2)
5250 {
5251 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5252 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5253 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5254 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005255
Nicolas Capens47dc8672017-04-25 12:54:39 -04005256 c0 = _mm_add_ps(c0, c1);
5257 c2 = _mm_add_ps(c2, c3);
5258 c0 = _mm_add_ps(c0, c2);
5259 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005260
Nicolas Capens47dc8672017-04-25 12:54:39 -04005261 _mm_store_ps((float*)(source0 + 8 * x), c0);
5262 }
5263
5264 source0 += pitch;
5265 source1 += pitch;
5266 source2 += pitch;
5267 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005268 }
John Bauman89401822014-05-06 15:04:28 -04005269 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005270 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005271 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005272 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005273 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005274 for(int x = 0; x < width; x += 2)
5275 {
5276 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5277 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5278 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5279 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5280 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5281 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5282 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5283 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005284
Nicolas Capens47dc8672017-04-25 12:54:39 -04005285 c0 = _mm_add_ps(c0, c1);
5286 c2 = _mm_add_ps(c2, c3);
5287 c4 = _mm_add_ps(c4, c5);
5288 c6 = _mm_add_ps(c6, c7);
5289 c0 = _mm_add_ps(c0, c2);
5290 c4 = _mm_add_ps(c4, c6);
5291 c0 = _mm_add_ps(c0, c4);
5292 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005293
Nicolas Capens47dc8672017-04-25 12:54:39 -04005294 _mm_store_ps((float*)(source0 + 8 * x), c0);
5295 }
5296
5297 source0 += pitch;
5298 source1 += pitch;
5299 source2 += pitch;
5300 source3 += pitch;
5301 source4 += pitch;
5302 source5 += pitch;
5303 source6 += pitch;
5304 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005305 }
John Bauman89401822014-05-06 15:04:28 -04005306 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005307 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005308 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005309 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005310 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005311 for(int x = 0; x < width; x += 2)
5312 {
5313 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5314 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5315 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5316 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5317 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5318 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5319 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5320 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5321 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5322 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5323 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5324 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5325 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5326 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5327 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5328 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04005329
Nicolas Capens47dc8672017-04-25 12:54:39 -04005330 c0 = _mm_add_ps(c0, c1);
5331 c2 = _mm_add_ps(c2, c3);
5332 c4 = _mm_add_ps(c4, c5);
5333 c6 = _mm_add_ps(c6, c7);
5334 c8 = _mm_add_ps(c8, c9);
5335 cA = _mm_add_ps(cA, cB);
5336 cC = _mm_add_ps(cC, cD);
5337 cE = _mm_add_ps(cE, cF);
5338 c0 = _mm_add_ps(c0, c2);
5339 c4 = _mm_add_ps(c4, c6);
5340 c8 = _mm_add_ps(c8, cA);
5341 cC = _mm_add_ps(cC, cE);
5342 c0 = _mm_add_ps(c0, c4);
5343 c8 = _mm_add_ps(c8, cC);
5344 c0 = _mm_add_ps(c0, c8);
5345 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005346
Nicolas Capens47dc8672017-04-25 12:54:39 -04005347 _mm_store_ps((float*)(source0 + 8 * x), c0);
5348 }
5349
5350 source0 += pitch;
5351 source1 += pitch;
5352 source2 += pitch;
5353 source3 += pitch;
5354 source4 += pitch;
5355 source5 += pitch;
5356 source6 += pitch;
5357 source7 += pitch;
5358 source8 += pitch;
5359 source9 += pitch;
5360 sourceA += pitch;
5361 sourceB += pitch;
5362 sourceC += pitch;
5363 sourceD += pitch;
5364 sourceE += pitch;
5365 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005366 }
John Bauman89401822014-05-06 15:04:28 -04005367 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005368 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005369 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005370 else
5371 #endif
John Bauman89401822014-05-06 15:04:28 -04005372 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005373 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005374 {
5375 for(int y = 0; y < height; y++)
5376 {
5377 for(int x = 0; x < 2 * width; x++)
5378 {
5379 float c0 = *(float*)(source0 + 4 * x);
5380 float c1 = *(float*)(source1 + 4 * x);
5381
5382 c0 = c0 + c1;
5383 c0 *= 1.0f / 2.0f;
5384
5385 *(float*)(source0 + 4 * x) = c0;
5386 }
5387
5388 source0 += pitch;
5389 source1 += pitch;
5390 }
5391 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005392 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005393 {
5394 for(int y = 0; y < height; y++)
5395 {
5396 for(int x = 0; x < 2 * width; x++)
5397 {
5398 float c0 = *(float*)(source0 + 4 * x);
5399 float c1 = *(float*)(source1 + 4 * x);
5400 float c2 = *(float*)(source2 + 4 * x);
5401 float c3 = *(float*)(source3 + 4 * x);
5402
5403 c0 = c0 + c1;
5404 c2 = c2 + c3;
5405 c0 = c0 + c2;
5406 c0 *= 1.0f / 4.0f;
5407
5408 *(float*)(source0 + 4 * x) = c0;
5409 }
5410
5411 source0 += pitch;
5412 source1 += pitch;
5413 source2 += pitch;
5414 source3 += pitch;
5415 }
5416 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005417 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005418 {
5419 for(int y = 0; y < height; y++)
5420 {
5421 for(int x = 0; x < 2 * width; x++)
5422 {
5423 float c0 = *(float*)(source0 + 4 * x);
5424 float c1 = *(float*)(source1 + 4 * x);
5425 float c2 = *(float*)(source2 + 4 * x);
5426 float c3 = *(float*)(source3 + 4 * x);
5427 float c4 = *(float*)(source4 + 4 * x);
5428 float c5 = *(float*)(source5 + 4 * x);
5429 float c6 = *(float*)(source6 + 4 * x);
5430 float c7 = *(float*)(source7 + 4 * x);
5431
5432 c0 = c0 + c1;
5433 c2 = c2 + c3;
5434 c4 = c4 + c5;
5435 c6 = c6 + c7;
5436 c0 = c0 + c2;
5437 c4 = c4 + c6;
5438 c0 = c0 + c4;
5439 c0 *= 1.0f / 8.0f;
5440
5441 *(float*)(source0 + 4 * x) = c0;
5442 }
5443
5444 source0 += pitch;
5445 source1 += pitch;
5446 source2 += pitch;
5447 source3 += pitch;
5448 source4 += pitch;
5449 source5 += pitch;
5450 source6 += pitch;
5451 source7 += pitch;
5452 }
5453 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005454 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005455 {
5456 for(int y = 0; y < height; y++)
5457 {
5458 for(int x = 0; x < 2 * width; x++)
5459 {
5460 float c0 = *(float*)(source0 + 4 * x);
5461 float c1 = *(float*)(source1 + 4 * x);
5462 float c2 = *(float*)(source2 + 4 * x);
5463 float c3 = *(float*)(source3 + 4 * x);
5464 float c4 = *(float*)(source4 + 4 * x);
5465 float c5 = *(float*)(source5 + 4 * x);
5466 float c6 = *(float*)(source6 + 4 * x);
5467 float c7 = *(float*)(source7 + 4 * x);
5468 float c8 = *(float*)(source8 + 4 * x);
5469 float c9 = *(float*)(source9 + 4 * x);
5470 float cA = *(float*)(sourceA + 4 * x);
5471 float cB = *(float*)(sourceB + 4 * x);
5472 float cC = *(float*)(sourceC + 4 * x);
5473 float cD = *(float*)(sourceD + 4 * x);
5474 float cE = *(float*)(sourceE + 4 * x);
5475 float cF = *(float*)(sourceF + 4 * x);
5476
5477 c0 = c0 + c1;
5478 c2 = c2 + c3;
5479 c4 = c4 + c5;
5480 c6 = c6 + c7;
5481 c8 = c8 + c9;
5482 cA = cA + cB;
5483 cC = cC + cD;
5484 cE = cE + cF;
5485 c0 = c0 + c2;
5486 c4 = c4 + c6;
5487 c8 = c8 + cA;
5488 cC = cC + cE;
5489 c0 = c0 + c4;
5490 c8 = c8 + cC;
5491 c0 = c0 + c8;
5492 c0 *= 1.0f / 16.0f;
5493
5494 *(float*)(source0 + 4 * x) = c0;
5495 }
5496
5497 source0 += pitch;
5498 source1 += pitch;
5499 source2 += pitch;
5500 source3 += pitch;
5501 source4 += pitch;
5502 source5 += pitch;
5503 source6 += pitch;
5504 source7 += pitch;
5505 source8 += pitch;
5506 source9 += pitch;
5507 sourceA += pitch;
5508 sourceB += pitch;
5509 sourceC += pitch;
5510 sourceD += pitch;
5511 sourceE += pitch;
5512 sourceF += pitch;
5513 }
5514 }
5515 else ASSERT(false);
5516 }
5517 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005518 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005519 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005520 #if defined(__i386__) || defined(__x86_64__)
5521 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04005522 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005523 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005524 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005525 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005526 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005527 for(int x = 0; x < width; x++)
5528 {
5529 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5530 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005531
Nicolas Capens47dc8672017-04-25 12:54:39 -04005532 c0 = _mm_add_ps(c0, c1);
5533 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005534
Nicolas Capens47dc8672017-04-25 12:54:39 -04005535 _mm_store_ps((float*)(source0 + 16 * x), c0);
5536 }
5537
5538 source0 += pitch;
5539 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005540 }
John Bauman89401822014-05-06 15:04:28 -04005541 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005542 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005543 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005544 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005545 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005546 for(int x = 0; x < width; x++)
5547 {
5548 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5549 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5550 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5551 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005552
Nicolas Capens47dc8672017-04-25 12:54:39 -04005553 c0 = _mm_add_ps(c0, c1);
5554 c2 = _mm_add_ps(c2, c3);
5555 c0 = _mm_add_ps(c0, c2);
5556 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005557
Nicolas Capens47dc8672017-04-25 12:54:39 -04005558 _mm_store_ps((float*)(source0 + 16 * x), c0);
5559 }
5560
5561 source0 += pitch;
5562 source1 += pitch;
5563 source2 += pitch;
5564 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005565 }
John Bauman89401822014-05-06 15:04:28 -04005566 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005567 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005568 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005569 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005570 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005571 for(int x = 0; x < width; x++)
5572 {
5573 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5574 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5575 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5576 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5577 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5578 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5579 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5580 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005581
Nicolas Capens47dc8672017-04-25 12:54:39 -04005582 c0 = _mm_add_ps(c0, c1);
5583 c2 = _mm_add_ps(c2, c3);
5584 c4 = _mm_add_ps(c4, c5);
5585 c6 = _mm_add_ps(c6, c7);
5586 c0 = _mm_add_ps(c0, c2);
5587 c4 = _mm_add_ps(c4, c6);
5588 c0 = _mm_add_ps(c0, c4);
5589 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005590
Nicolas Capens47dc8672017-04-25 12:54:39 -04005591 _mm_store_ps((float*)(source0 + 16 * x), c0);
5592 }
5593
5594 source0 += pitch;
5595 source1 += pitch;
5596 source2 += pitch;
5597 source3 += pitch;
5598 source4 += pitch;
5599 source5 += pitch;
5600 source6 += pitch;
5601 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005602 }
John Bauman89401822014-05-06 15:04:28 -04005603 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005604 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005605 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005606 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005607 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005608 for(int x = 0; x < width; x++)
5609 {
5610 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5611 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5612 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5613 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5614 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5615 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5616 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5617 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5618 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5619 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5620 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5621 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5622 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5623 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5624 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5625 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
John Bauman89401822014-05-06 15:04:28 -04005626
Nicolas Capens47dc8672017-04-25 12:54:39 -04005627 c0 = _mm_add_ps(c0, c1);
5628 c2 = _mm_add_ps(c2, c3);
5629 c4 = _mm_add_ps(c4, c5);
5630 c6 = _mm_add_ps(c6, c7);
5631 c8 = _mm_add_ps(c8, c9);
5632 cA = _mm_add_ps(cA, cB);
5633 cC = _mm_add_ps(cC, cD);
5634 cE = _mm_add_ps(cE, cF);
5635 c0 = _mm_add_ps(c0, c2);
5636 c4 = _mm_add_ps(c4, c6);
5637 c8 = _mm_add_ps(c8, cA);
5638 cC = _mm_add_ps(cC, cE);
5639 c0 = _mm_add_ps(c0, c4);
5640 c8 = _mm_add_ps(c8, cC);
5641 c0 = _mm_add_ps(c0, c8);
5642 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005643
Nicolas Capens47dc8672017-04-25 12:54:39 -04005644 _mm_store_ps((float*)(source0 + 16 * x), c0);
5645 }
5646
5647 source0 += pitch;
5648 source1 += pitch;
5649 source2 += pitch;
5650 source3 += pitch;
5651 source4 += pitch;
5652 source5 += pitch;
5653 source6 += pitch;
5654 source7 += pitch;
5655 source8 += pitch;
5656 source9 += pitch;
5657 sourceA += pitch;
5658 sourceB += pitch;
5659 sourceC += pitch;
5660 sourceD += pitch;
5661 sourceE += pitch;
5662 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005663 }
John Bauman89401822014-05-06 15:04:28 -04005664 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005665 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005666 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005667 else
5668 #endif
John Bauman89401822014-05-06 15:04:28 -04005669 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005670 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005671 {
5672 for(int y = 0; y < height; y++)
5673 {
5674 for(int x = 0; x < 4 * width; x++)
5675 {
5676 float c0 = *(float*)(source0 + 4 * x);
5677 float c1 = *(float*)(source1 + 4 * x);
5678
5679 c0 = c0 + c1;
5680 c0 *= 1.0f / 2.0f;
5681
5682 *(float*)(source0 + 4 * x) = c0;
5683 }
5684
5685 source0 += pitch;
5686 source1 += pitch;
5687 }
5688 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005689 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005690 {
5691 for(int y = 0; y < height; y++)
5692 {
5693 for(int x = 0; x < 4 * width; x++)
5694 {
5695 float c0 = *(float*)(source0 + 4 * x);
5696 float c1 = *(float*)(source1 + 4 * x);
5697 float c2 = *(float*)(source2 + 4 * x);
5698 float c3 = *(float*)(source3 + 4 * x);
5699
5700 c0 = c0 + c1;
5701 c2 = c2 + c3;
5702 c0 = c0 + c2;
5703 c0 *= 1.0f / 4.0f;
5704
5705 *(float*)(source0 + 4 * x) = c0;
5706 }
5707
5708 source0 += pitch;
5709 source1 += pitch;
5710 source2 += pitch;
5711 source3 += pitch;
5712 }
5713 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005714 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005715 {
5716 for(int y = 0; y < height; y++)
5717 {
5718 for(int x = 0; x < 4 * width; x++)
5719 {
5720 float c0 = *(float*)(source0 + 4 * x);
5721 float c1 = *(float*)(source1 + 4 * x);
5722 float c2 = *(float*)(source2 + 4 * x);
5723 float c3 = *(float*)(source3 + 4 * x);
5724 float c4 = *(float*)(source4 + 4 * x);
5725 float c5 = *(float*)(source5 + 4 * x);
5726 float c6 = *(float*)(source6 + 4 * x);
5727 float c7 = *(float*)(source7 + 4 * x);
5728
5729 c0 = c0 + c1;
5730 c2 = c2 + c3;
5731 c4 = c4 + c5;
5732 c6 = c6 + c7;
5733 c0 = c0 + c2;
5734 c4 = c4 + c6;
5735 c0 = c0 + c4;
5736 c0 *= 1.0f / 8.0f;
5737
5738 *(float*)(source0 + 4 * x) = c0;
5739 }
5740
5741 source0 += pitch;
5742 source1 += pitch;
5743 source2 += pitch;
5744 source3 += pitch;
5745 source4 += pitch;
5746 source5 += pitch;
5747 source6 += pitch;
5748 source7 += pitch;
5749 }
5750 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005751 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005752 {
5753 for(int y = 0; y < height; y++)
5754 {
5755 for(int x = 0; x < 4 * width; x++)
5756 {
5757 float c0 = *(float*)(source0 + 4 * x);
5758 float c1 = *(float*)(source1 + 4 * x);
5759 float c2 = *(float*)(source2 + 4 * x);
5760 float c3 = *(float*)(source3 + 4 * x);
5761 float c4 = *(float*)(source4 + 4 * x);
5762 float c5 = *(float*)(source5 + 4 * x);
5763 float c6 = *(float*)(source6 + 4 * x);
5764 float c7 = *(float*)(source7 + 4 * x);
5765 float c8 = *(float*)(source8 + 4 * x);
5766 float c9 = *(float*)(source9 + 4 * x);
5767 float cA = *(float*)(sourceA + 4 * x);
5768 float cB = *(float*)(sourceB + 4 * x);
5769 float cC = *(float*)(sourceC + 4 * x);
5770 float cD = *(float*)(sourceD + 4 * x);
5771 float cE = *(float*)(sourceE + 4 * x);
5772 float cF = *(float*)(sourceF + 4 * x);
5773
5774 c0 = c0 + c1;
5775 c2 = c2 + c3;
5776 c4 = c4 + c5;
5777 c6 = c6 + c7;
5778 c8 = c8 + c9;
5779 cA = cA + cB;
5780 cC = cC + cD;
5781 cE = cE + cF;
5782 c0 = c0 + c2;
5783 c4 = c4 + c6;
5784 c8 = c8 + cA;
5785 cC = cC + cE;
5786 c0 = c0 + c4;
5787 c8 = c8 + cC;
5788 c0 = c0 + c8;
5789 c0 *= 1.0f / 16.0f;
5790
5791 *(float*)(source0 + 4 * x) = c0;
5792 }
5793
5794 source0 += pitch;
5795 source1 += pitch;
5796 source2 += pitch;
5797 source3 += pitch;
5798 source4 += pitch;
5799 source5 += pitch;
5800 source6 += pitch;
5801 source7 += pitch;
5802 source8 += pitch;
5803 source9 += pitch;
5804 sourceA += pitch;
5805 sourceB += pitch;
5806 sourceC += pitch;
5807 sourceD += pitch;
5808 sourceE += pitch;
5809 sourceF += pitch;
5810 }
5811 }
5812 else ASSERT(false);
5813 }
5814 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005815 else if(internal.format == FORMAT_R5G6B5)
5816 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005817 #if defined(__i386__) || defined(__x86_64__)
5818 if(CPUID::supportsSSE2() && (width % 8) == 0)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005819 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005820 if(internal.samples == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005821 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005822 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005823 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005824 for(int x = 0; x < width; x += 8)
5825 {
5826 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5827 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005828
Nicolas Capens47dc8672017-04-25 12:54:39 -04005829 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5830 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5831 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5832 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5833 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5834 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005835
Nicolas Capens47dc8672017-04-25 12:54:39 -04005836 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5837 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5838 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5839 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5840 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005841
Nicolas Capens47dc8672017-04-25 12:54:39 -04005842 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5843 }
5844
5845 source0 += pitch;
5846 source1 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005847 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005848 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005849 else if(internal.samples == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005850 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005851 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005852 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005853 for(int x = 0; x < width; x += 8)
5854 {
5855 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5856 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5857 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5858 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005859
Nicolas Capens47dc8672017-04-25 12:54:39 -04005860 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5861 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5862 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5863 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5864 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5865 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5866 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5867 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5868 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5869 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005870
Nicolas Capens47dc8672017-04-25 12:54:39 -04005871 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5872 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5873 c0 = _mm_avg_epu8(c0, c2);
5874 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5875 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5876 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5877 c1 = _mm_avg_epu16(c1, c3);
5878 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5879 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005880
Nicolas Capens47dc8672017-04-25 12:54:39 -04005881 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5882 }
5883
5884 source0 += pitch;
5885 source1 += pitch;
5886 source2 += pitch;
5887 source3 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005888 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005889 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005890 else if(internal.samples == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005891 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005892 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005893 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005894 for(int x = 0; x < width; x += 8)
5895 {
5896 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5897 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5898 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5899 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5900 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5901 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5902 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5903 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005904
Nicolas Capens47dc8672017-04-25 12:54:39 -04005905 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5906 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5907 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5908 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5909 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5910 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5911 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5912 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5913 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5914 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5915 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5916 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5917 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5918 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5919 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5920 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5921 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5922 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005923
Nicolas Capens47dc8672017-04-25 12:54:39 -04005924 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5925 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5926 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5927 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5928 c0 = _mm_avg_epu8(c0, c2);
5929 c4 = _mm_avg_epu8(c4, c6);
5930 c0 = _mm_avg_epu8(c0, c4);
5931 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5932 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5933 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5934 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5935 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5936 c1 = _mm_avg_epu16(c1, c3);
5937 c5 = _mm_avg_epu16(c5, c7);
5938 c1 = _mm_avg_epu16(c1, c5);
5939 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5940 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005941
Nicolas Capens47dc8672017-04-25 12:54:39 -04005942 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5943 }
5944
5945 source0 += pitch;
5946 source1 += pitch;
5947 source2 += pitch;
5948 source3 += pitch;
5949 source4 += pitch;
5950 source5 += pitch;
5951 source6 += pitch;
5952 source7 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005953 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005954 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005955 else if(internal.samples == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005956 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005957 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005958 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005959 for(int x = 0; x < width; x += 8)
5960 {
5961 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5962 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5963 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5964 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5965 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5966 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5967 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5968 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5969 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5970 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5971 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5972 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5973 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5974 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5975 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5976 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005977
Nicolas Capens47dc8672017-04-25 12:54:39 -04005978 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5979 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5980 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5981 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5982 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5983 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5984 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5985 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5986 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5987 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5988 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5989 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5990 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5991 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5992 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5993 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5994 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5995 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5996 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5997 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5998 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5999 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
6000 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
6001 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
6002 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
6003 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
6004 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
6005 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
6006 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
6007 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
6008 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
6009 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
6010 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
6011 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04006012
Nicolas Capens47dc8672017-04-25 12:54:39 -04006013 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
6014 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
6015 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
6016 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
6017 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
6018 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
6019 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
6020 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
6021 c0 = _mm_avg_epu8(c0, c2);
6022 c4 = _mm_avg_epu8(c4, c6);
6023 c8 = _mm_avg_epu8(c8, cA);
6024 cC = _mm_avg_epu8(cC, cE);
6025 c0 = _mm_avg_epu8(c0, c4);
6026 c8 = _mm_avg_epu8(c8, cC);
6027 c0 = _mm_avg_epu8(c0, c8);
6028 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
6029 c1 = _mm_avg_epu16(c0__g_, c1__g_);
6030 c3 = _mm_avg_epu16(c2__g_, c3__g_);
6031 c5 = _mm_avg_epu16(c4__g_, c5__g_);
6032 c7 = _mm_avg_epu16(c6__g_, c7__g_);
6033 c9 = _mm_avg_epu16(c8__g_, c9__g_);
6034 cB = _mm_avg_epu16(cA__g_, cB__g_);
6035 cD = _mm_avg_epu16(cC__g_, cD__g_);
6036 cF = _mm_avg_epu16(cE__g_, cF__g_);
6037 c1 = _mm_avg_epu8(c1, c3);
6038 c5 = _mm_avg_epu8(c5, c7);
6039 c9 = _mm_avg_epu8(c9, cB);
6040 cD = _mm_avg_epu8(cD, cF);
6041 c1 = _mm_avg_epu8(c1, c5);
6042 c9 = _mm_avg_epu8(c9, cD);
6043 c1 = _mm_avg_epu8(c1, c9);
6044 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
6045 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04006046
Nicolas Capens47dc8672017-04-25 12:54:39 -04006047 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
6048 }
6049
6050 source0 += pitch;
6051 source1 += pitch;
6052 source2 += pitch;
6053 source3 += pitch;
6054 source4 += pitch;
6055 source5 += pitch;
6056 source6 += pitch;
6057 source7 += pitch;
6058 source8 += pitch;
6059 source9 += pitch;
6060 sourceA += pitch;
6061 sourceB += pitch;
6062 sourceC += pitch;
6063 sourceD += pitch;
6064 sourceE += pitch;
6065 sourceF += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04006066 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04006067 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04006068 else ASSERT(false);
Nicolas Capens0e12a922015-09-04 09:18:15 -04006069 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04006070 else
6071 #endif
Nicolas Capens0e12a922015-09-04 09:18:15 -04006072 {
6073 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
6074
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006075 if(internal.samples == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006076 {
6077 for(int y = 0; y < height; y++)
6078 {
6079 for(int x = 0; x < width; x++)
6080 {
6081 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6082 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6083
6084 c0 = AVERAGE(c0, c1);
6085
6086 *(unsigned short*)(source0 + 2 * x) = c0;
6087 }
6088
6089 source0 += pitch;
6090 source1 += pitch;
6091 }
6092 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006093 else if(internal.samples == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006094 {
6095 for(int y = 0; y < height; y++)
6096 {
6097 for(int x = 0; x < width; x++)
6098 {
6099 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6100 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6101 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6102 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6103
6104 c0 = AVERAGE(c0, c1);
6105 c2 = AVERAGE(c2, c3);
6106 c0 = AVERAGE(c0, c2);
6107
6108 *(unsigned short*)(source0 + 2 * x) = c0;
6109 }
6110
6111 source0 += pitch;
6112 source1 += pitch;
6113 source2 += pitch;
6114 source3 += pitch;
6115 }
6116 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006117 else if(internal.samples == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006118 {
6119 for(int y = 0; y < height; y++)
6120 {
6121 for(int x = 0; x < width; x++)
6122 {
6123 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6124 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6125 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6126 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6127 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6128 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6129 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6130 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6131
6132 c0 = AVERAGE(c0, c1);
6133 c2 = AVERAGE(c2, c3);
6134 c4 = AVERAGE(c4, c5);
6135 c6 = AVERAGE(c6, c7);
6136 c0 = AVERAGE(c0, c2);
6137 c4 = AVERAGE(c4, c6);
6138 c0 = AVERAGE(c0, c4);
6139
6140 *(unsigned short*)(source0 + 2 * x) = c0;
6141 }
6142
6143 source0 += pitch;
6144 source1 += pitch;
6145 source2 += pitch;
6146 source3 += pitch;
6147 source4 += pitch;
6148 source5 += pitch;
6149 source6 += pitch;
6150 source7 += pitch;
6151 }
6152 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006153 else if(internal.samples == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006154 {
6155 for(int y = 0; y < height; y++)
6156 {
6157 for(int x = 0; x < width; x++)
6158 {
6159 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6160 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6161 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6162 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6163 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6164 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6165 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6166 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6167 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
6168 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
6169 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
6170 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
6171 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
6172 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
6173 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
6174 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
6175
6176 c0 = AVERAGE(c0, c1);
6177 c2 = AVERAGE(c2, c3);
6178 c4 = AVERAGE(c4, c5);
6179 c6 = AVERAGE(c6, c7);
6180 c8 = AVERAGE(c8, c9);
6181 cA = AVERAGE(cA, cB);
6182 cC = AVERAGE(cC, cD);
6183 cE = AVERAGE(cE, cF);
6184 c0 = AVERAGE(c0, c2);
6185 c4 = AVERAGE(c4, c6);
6186 c8 = AVERAGE(c8, cA);
6187 cC = AVERAGE(cC, cE);
6188 c0 = AVERAGE(c0, c4);
6189 c8 = AVERAGE(c8, cC);
6190 c0 = AVERAGE(c0, c8);
6191
6192 *(unsigned short*)(source0 + 2 * x) = c0;
6193 }
6194
6195 source0 += pitch;
6196 source1 += pitch;
6197 source2 += pitch;
6198 source3 += pitch;
6199 source4 += pitch;
6200 source5 += pitch;
6201 source6 += pitch;
6202 source7 += pitch;
6203 source8 += pitch;
6204 source9 += pitch;
6205 sourceA += pitch;
6206 sourceB += pitch;
6207 sourceC += pitch;
6208 sourceD += pitch;
6209 sourceE += pitch;
6210 sourceF += pitch;
6211 }
6212 }
6213 else ASSERT(false);
6214
6215 #undef AVERAGE
6216 }
6217 }
John Bauman89401822014-05-06 15:04:28 -04006218 else
6219 {
6220 // UNIMPLEMENTED();
6221 }
6222 }
6223}