blob: e732fb818b9f57314d7db50ef94947152ebef471 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
Nicolas Capens47dc8672017-04-25 12:54:39 -040028#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
John Bauman89401822014-05-06 15:04:28 -040032
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
44
45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
46 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -050047 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
John Bauman89401822014-05-06 15:04:28 -040048
Nicolas Capensbfa23b32017-12-11 10:06:37 -050049 for(int i = 0; i < samples; i++)
50 {
51 write(element, color);
52 element += sliceB;
53 }
John Bauman89401822014-05-06 15:04:28 -040054 }
55
56 void Surface::Buffer::write(int x, int y, const Color<float> &color)
57 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -050058 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -040059
Nicolas Capensbfa23b32017-12-11 10:06:37 -050060 for(int i = 0; i < samples; i++)
61 {
62 write(element, color);
63 element += sliceB;
64 }
John Bauman89401822014-05-06 15:04:28 -040065 }
66
67 inline void Surface::Buffer::write(void *element, const Color<float> &color)
68 {
69 switch(format)
70 {
71 case FORMAT_A8:
72 *(unsigned char*)element = unorm<8>(color.a);
73 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040074 case FORMAT_R8I_SNORM:
75 *(char*)element = snorm<8>(color.r);
76 break;
John Bauman89401822014-05-06 15:04:28 -040077 case FORMAT_R8:
78 *(unsigned char*)element = unorm<8>(color.r);
79 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040080 case FORMAT_R8I:
81 *(char*)element = scast<8>(color.r);
82 break;
83 case FORMAT_R8UI:
84 *(unsigned char*)element = ucast<8>(color.r);
85 break;
86 case FORMAT_R16I:
87 *(short*)element = scast<16>(color.r);
88 break;
89 case FORMAT_R16UI:
90 *(unsigned short*)element = ucast<16>(color.r);
91 break;
92 case FORMAT_R32I:
93 *(int*)element = static_cast<int>(color.r);
94 break;
95 case FORMAT_R32UI:
96 *(unsigned int*)element = static_cast<unsigned int>(color.r);
97 break;
John Bauman89401822014-05-06 15:04:28 -040098 case FORMAT_R3G3B2:
99 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
100 break;
101 case FORMAT_A8R3G3B2:
102 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
103 break;
104 case FORMAT_X4R4G4B4:
105 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
106 break;
107 case FORMAT_A4R4G4B4:
108 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
109 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400110 case FORMAT_R4G4B4A4:
111 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
112 break;
John Bauman89401822014-05-06 15:04:28 -0400113 case FORMAT_R5G6B5:
114 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
115 break;
116 case FORMAT_A1R5G5B5:
117 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
118 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400119 case FORMAT_R5G5B5A1:
120 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
121 break;
John Bauman89401822014-05-06 15:04:28 -0400122 case FORMAT_X1R5G5B5:
123 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
124 break;
125 case FORMAT_A8R8G8B8:
126 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
127 break;
128 case FORMAT_X8R8G8B8:
129 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
130 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400131 case FORMAT_A8B8G8R8I_SNORM:
132 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
133 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
134 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
135 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
136 break;
John Bauman89401822014-05-06 15:04:28 -0400137 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400138 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400139 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
140 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400141 case FORMAT_A8B8G8R8I:
142 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
143 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
144 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
145 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
146 break;
147 case FORMAT_A8B8G8R8UI:
148 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
149 break;
150 case FORMAT_X8B8G8R8I_SNORM:
151 *(unsigned int*)element = 0x7F000000 |
152 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
153 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
154 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
155 break;
John Bauman89401822014-05-06 15:04:28 -0400156 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400157 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400158 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
159 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400160 case FORMAT_X8B8G8R8I:
161 *(unsigned int*)element = 0x7F000000 |
162 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
163 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
164 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
165 case FORMAT_X8B8G8R8UI:
166 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
167 break;
John Bauman89401822014-05-06 15:04:28 -0400168 case FORMAT_A2R10G10B10:
169 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
170 break;
171 case FORMAT_A2B10G10R10:
172 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
173 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400174 case FORMAT_G8R8I_SNORM:
175 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
176 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
177 break;
John Bauman89401822014-05-06 15:04:28 -0400178 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400179 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
180 break;
181 case FORMAT_G8R8I:
182 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
183 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
184 break;
185 case FORMAT_G8R8UI:
186 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400187 break;
188 case FORMAT_G16R16:
189 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
190 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400191 case FORMAT_G16R16I:
192 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
193 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
194 break;
195 case FORMAT_G16R16UI:
196 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
197 break;
198 case FORMAT_G32R32I:
199 case FORMAT_G32R32UI:
200 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
201 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
202 break;
John Bauman89401822014-05-06 15:04:28 -0400203 case FORMAT_A16B16G16R16:
204 ((unsigned short*)element)[0] = unorm<16>(color.r);
205 ((unsigned short*)element)[1] = unorm<16>(color.g);
206 ((unsigned short*)element)[2] = unorm<16>(color.b);
207 ((unsigned short*)element)[3] = unorm<16>(color.a);
208 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400209 case FORMAT_A16B16G16R16I:
210 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
211 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
212 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
213 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
214 break;
215 case FORMAT_A16B16G16R16UI:
216 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
217 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
218 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
219 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
220 break;
221 case FORMAT_X16B16G16R16I:
222 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
223 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
224 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
225 break;
226 case FORMAT_X16B16G16R16UI:
227 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
228 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
229 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
230 break;
231 case FORMAT_A32B32G32R32I:
232 case FORMAT_A32B32G32R32UI:
233 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
234 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
235 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
236 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
237 break;
238 case FORMAT_X32B32G32R32I:
239 case FORMAT_X32B32G32R32UI:
240 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
241 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
242 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
243 break;
John Bauman89401822014-05-06 15:04:28 -0400244 case FORMAT_V8U8:
245 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
246 break;
247 case FORMAT_L6V5U5:
248 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
249 break;
250 case FORMAT_Q8W8V8U8:
251 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
252 break;
253 case FORMAT_X8L8V8U8:
254 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
255 break;
256 case FORMAT_V16U16:
257 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
258 break;
259 case FORMAT_A2W10V10U10:
260 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
261 break;
262 case FORMAT_A16W16V16U16:
263 ((unsigned short*)element)[0] = snorm<16>(color.r);
264 ((unsigned short*)element)[1] = snorm<16>(color.g);
265 ((unsigned short*)element)[2] = snorm<16>(color.b);
266 ((unsigned short*)element)[3] = unorm<16>(color.a);
267 break;
268 case FORMAT_Q16W16V16U16:
269 ((unsigned short*)element)[0] = snorm<16>(color.r);
270 ((unsigned short*)element)[1] = snorm<16>(color.g);
271 ((unsigned short*)element)[2] = snorm<16>(color.b);
272 ((unsigned short*)element)[3] = snorm<16>(color.a);
273 break;
274 case FORMAT_R8G8B8:
275 ((unsigned char*)element)[0] = unorm<8>(color.b);
276 ((unsigned char*)element)[1] = unorm<8>(color.g);
277 ((unsigned char*)element)[2] = unorm<8>(color.r);
278 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400279 case FORMAT_B8G8R8:
280 ((unsigned char*)element)[0] = unorm<8>(color.r);
281 ((unsigned char*)element)[1] = unorm<8>(color.g);
282 ((unsigned char*)element)[2] = unorm<8>(color.b);
283 break;
John Bauman89401822014-05-06 15:04:28 -0400284 case FORMAT_R16F:
285 *(half*)element = (half)color.r;
286 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400287 case FORMAT_A16F:
288 *(half*)element = (half)color.a;
289 break;
John Bauman89401822014-05-06 15:04:28 -0400290 case FORMAT_G16R16F:
291 ((half*)element)[0] = (half)color.r;
292 ((half*)element)[1] = (half)color.g;
293 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400294 case FORMAT_B16G16R16F:
295 ((half*)element)[0] = (half)color.r;
296 ((half*)element)[1] = (half)color.g;
297 ((half*)element)[2] = (half)color.b;
298 break;
John Bauman89401822014-05-06 15:04:28 -0400299 case FORMAT_A16B16G16R16F:
300 ((half*)element)[0] = (half)color.r;
301 ((half*)element)[1] = (half)color.g;
302 ((half*)element)[2] = (half)color.b;
303 ((half*)element)[3] = (half)color.a;
304 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400305 case FORMAT_A32F:
306 *(float*)element = color.a;
307 break;
John Bauman89401822014-05-06 15:04:28 -0400308 case FORMAT_R32F:
309 *(float*)element = color.r;
310 break;
311 case FORMAT_G32R32F:
312 ((float*)element)[0] = color.r;
313 ((float*)element)[1] = color.g;
314 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400315 case FORMAT_X32B32G32R32F:
316 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400317 case FORMAT_B32G32R32F:
318 ((float*)element)[0] = color.r;
319 ((float*)element)[1] = color.g;
320 ((float*)element)[2] = color.b;
321 break;
John Bauman89401822014-05-06 15:04:28 -0400322 case FORMAT_A32B32G32R32F:
323 ((float*)element)[0] = color.r;
324 ((float*)element)[1] = color.g;
325 ((float*)element)[2] = color.b;
326 ((float*)element)[3] = color.a;
327 break;
328 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500329 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400330 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400331 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500332 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400333 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400334 *((float*)element) = color.r;
335 break;
336 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500337 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -0400338 *((float*)element) = 1 - color.r;
339 break;
340 case FORMAT_S8:
341 *((unsigned char*)element) = unorm<8>(color.r);
342 break;
343 case FORMAT_L8:
344 *(unsigned char*)element = unorm<8>(color.r);
345 break;
346 case FORMAT_A4L4:
347 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
348 break;
349 case FORMAT_L16:
350 *(unsigned short*)element = unorm<16>(color.r);
351 break;
352 case FORMAT_A8L8:
353 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
354 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400355 case FORMAT_L16F:
356 *(half*)element = (half)color.r;
357 break;
358 case FORMAT_A16L16F:
359 ((half*)element)[0] = (half)color.r;
360 ((half*)element)[1] = (half)color.a;
361 break;
362 case FORMAT_L32F:
363 *(float*)element = color.r;
364 break;
365 case FORMAT_A32L32F:
366 ((float*)element)[0] = color.r;
367 ((float*)element)[1] = color.a;
368 break;
John Bauman89401822014-05-06 15:04:28 -0400369 default:
370 ASSERT(false);
371 }
372 }
373
374 Color<float> Surface::Buffer::read(int x, int y, int z) const
375 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -0500376 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
John Bauman89401822014-05-06 15:04:28 -0400377
378 return read(element);
379 }
380
381 Color<float> Surface::Buffer::read(int x, int y) const
382 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -0500383 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -0400384
385 return read(element);
386 }
387
388 inline Color<float> Surface::Buffer::read(void *element) const
389 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400390 float r = 0.0f;
391 float g = 0.0f;
392 float b = 0.0f;
393 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400394
395 switch(format)
396 {
397 case FORMAT_P8:
398 {
399 ASSERT(palette);
400
401 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400402
John Bauman89401822014-05-06 15:04:28 -0400403 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
404 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
405 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
406 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
407 }
408 break;
409 case FORMAT_A8P8:
410 {
411 ASSERT(palette);
412
413 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400414
John Bauman89401822014-05-06 15:04:28 -0400415 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
416 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
417 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
418 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
419 }
420 break;
421 case FORMAT_A8:
422 r = 0;
423 g = 0;
424 b = 0;
425 a = *(unsigned char*)element * (1.0f / 0xFF);
426 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400427 case FORMAT_R8I_SNORM:
428 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
429 break;
John Bauman89401822014-05-06 15:04:28 -0400430 case FORMAT_R8:
431 r = *(unsigned char*)element * (1.0f / 0xFF);
432 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400433 case FORMAT_R8I:
434 r = *(signed char*)element;
435 break;
436 case FORMAT_R8UI:
437 r = *(unsigned char*)element;
438 break;
John Bauman89401822014-05-06 15:04:28 -0400439 case FORMAT_R3G3B2:
440 {
441 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400442
John Bauman89401822014-05-06 15:04:28 -0400443 r = (rgb & 0xE0) * (1.0f / 0xE0);
444 g = (rgb & 0x1C) * (1.0f / 0x1C);
445 b = (rgb & 0x03) * (1.0f / 0x03);
446 }
447 break;
448 case FORMAT_A8R3G3B2:
449 {
450 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400451
John Bauman89401822014-05-06 15:04:28 -0400452 a = (argb & 0xFF00) * (1.0f / 0xFF00);
453 r = (argb & 0x00E0) * (1.0f / 0x00E0);
454 g = (argb & 0x001C) * (1.0f / 0x001C);
455 b = (argb & 0x0003) * (1.0f / 0x0003);
456 }
457 break;
458 case FORMAT_X4R4G4B4:
459 {
460 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400461
John Bauman89401822014-05-06 15:04:28 -0400462 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
463 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
464 b = (rgb & 0x000F) * (1.0f / 0x000F);
465 }
466 break;
467 case FORMAT_A4R4G4B4:
468 {
469 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400470
John Bauman89401822014-05-06 15:04:28 -0400471 a = (argb & 0xF000) * (1.0f / 0xF000);
472 r = (argb & 0x0F00) * (1.0f / 0x0F00);
473 g = (argb & 0x00F0) * (1.0f / 0x00F0);
474 b = (argb & 0x000F) * (1.0f / 0x000F);
475 }
476 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400477 case FORMAT_R4G4B4A4:
478 {
479 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400480
Nicolas Capens80594422015-06-09 16:42:56 -0400481 r = (rgba & 0xF000) * (1.0f / 0xF000);
482 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
483 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
484 a = (rgba & 0x000F) * (1.0f / 0x000F);
485 }
486 break;
John Bauman89401822014-05-06 15:04:28 -0400487 case FORMAT_R5G6B5:
488 {
489 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400490
John Bauman89401822014-05-06 15:04:28 -0400491 r = (rgb & 0xF800) * (1.0f / 0xF800);
492 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
493 b = (rgb & 0x001F) * (1.0f / 0x001F);
494 }
495 break;
496 case FORMAT_A1R5G5B5:
497 {
498 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400499
John Bauman89401822014-05-06 15:04:28 -0400500 a = (argb & 0x8000) * (1.0f / 0x8000);
501 r = (argb & 0x7C00) * (1.0f / 0x7C00);
502 g = (argb & 0x03E0) * (1.0f / 0x03E0);
503 b = (argb & 0x001F) * (1.0f / 0x001F);
504 }
505 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400506 case FORMAT_R5G5B5A1:
507 {
508 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400509
Nicolas Capens80594422015-06-09 16:42:56 -0400510 r = (rgba & 0xF800) * (1.0f / 0xF800);
511 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
512 b = (rgba & 0x003E) * (1.0f / 0x003E);
513 a = (rgba & 0x0001) * (1.0f / 0x0001);
514 }
515 break;
John Bauman89401822014-05-06 15:04:28 -0400516 case FORMAT_X1R5G5B5:
517 {
518 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400519
John Bauman89401822014-05-06 15:04:28 -0400520 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
521 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
522 b = (xrgb & 0x001F) * (1.0f / 0x001F);
523 }
524 break;
525 case FORMAT_A8R8G8B8:
526 {
527 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400528
John Bauman89401822014-05-06 15:04:28 -0400529 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
530 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
531 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
532 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
533 }
534 break;
535 case FORMAT_X8R8G8B8:
536 {
537 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400538
John Bauman89401822014-05-06 15:04:28 -0400539 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
540 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
541 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
542 }
543 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400544 case FORMAT_A8B8G8R8I_SNORM:
545 {
546 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400547
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400548 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
549 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
550 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
551 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
552 }
553 break;
John Bauman89401822014-05-06 15:04:28 -0400554 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400555 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400556 {
557 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400558
John Bauman89401822014-05-06 15:04:28 -0400559 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
560 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
561 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
562 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
563 }
564 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400565 case FORMAT_A8B8G8R8I:
566 {
567 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400568
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400569 r = abgr[0];
570 g = abgr[1];
571 b = abgr[2];
572 a = abgr[3];
573 }
574 break;
575 case FORMAT_A8B8G8R8UI:
576 {
577 unsigned char* abgr = (unsigned char*)element;
578
579 r = abgr[0];
580 g = abgr[1];
581 b = abgr[2];
582 a = abgr[3];
583 }
584 break;
585 case FORMAT_X8B8G8R8I_SNORM:
586 {
587 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400588
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400589 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
590 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
591 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
592 }
593 break;
John Bauman89401822014-05-06 15:04:28 -0400594 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400595 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400596 {
597 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400598
John Bauman89401822014-05-06 15:04:28 -0400599 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
600 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
601 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
602 }
603 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400604 case FORMAT_X8B8G8R8I:
605 {
606 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400607
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400608 r = bgr[0];
609 g = bgr[1];
610 b = bgr[2];
611 }
612 break;
613 case FORMAT_X8B8G8R8UI:
614 {
615 unsigned char* bgr = (unsigned char*)element;
616
617 r = bgr[0];
618 g = bgr[1];
619 b = bgr[2];
620 }
621 break;
622 case FORMAT_G8R8I_SNORM:
623 {
624 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400625
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400626 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
627 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
628 }
629 break;
John Bauman89401822014-05-06 15:04:28 -0400630 case FORMAT_G8R8:
631 {
632 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400633
John Bauman89401822014-05-06 15:04:28 -0400634 g = (gr & 0xFF00) * (1.0f / 0xFF00);
635 r = (gr & 0x00FF) * (1.0f / 0x00FF);
636 }
637 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400638 case FORMAT_G8R8I:
639 {
640 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400641
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400642 r = gr[0];
643 g = gr[1];
644 }
645 break;
646 case FORMAT_G8R8UI:
647 {
648 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400649
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400650 r = gr[0];
651 g = gr[1];
652 }
653 break;
654 case FORMAT_R16I:
655 r = *((short*)element);
656 break;
657 case FORMAT_R16UI:
658 r = *((unsigned short*)element);
659 break;
660 case FORMAT_G16R16I:
661 {
662 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400663
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400664 r = gr[0];
665 g = gr[1];
666 }
667 break;
John Bauman89401822014-05-06 15:04:28 -0400668 case FORMAT_G16R16:
669 {
670 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400671
John Bauman89401822014-05-06 15:04:28 -0400672 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
673 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
674 }
675 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400676 case FORMAT_G16R16UI:
677 {
678 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400679
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400680 r = gr[0];
681 g = gr[1];
682 }
683 break;
John Bauman89401822014-05-06 15:04:28 -0400684 case FORMAT_A2R10G10B10:
685 {
686 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400687
John Bauman89401822014-05-06 15:04:28 -0400688 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
689 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
690 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
691 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
692 }
693 break;
694 case FORMAT_A2B10G10R10:
695 {
696 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400697
John Bauman89401822014-05-06 15:04:28 -0400698 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
699 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
700 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
701 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
702 }
703 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400704 case FORMAT_A16B16G16R16I:
705 {
706 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400707
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400708 r = abgr[0];
709 g = abgr[1];
710 b = abgr[2];
711 a = abgr[3];
712 }
713 break;
John Bauman89401822014-05-06 15:04:28 -0400714 case FORMAT_A16B16G16R16:
715 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
716 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
717 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
718 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
719 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400720 case FORMAT_A16B16G16R16UI:
721 {
722 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400723
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400724 r = abgr[0];
725 g = abgr[1];
726 b = abgr[2];
727 a = abgr[3];
728 }
729 break;
730 case FORMAT_X16B16G16R16I:
731 {
732 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400733
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400734 r = bgr[0];
735 g = bgr[1];
736 b = bgr[2];
737 }
738 break;
739 case FORMAT_X16B16G16R16UI:
740 {
741 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400742
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400743 r = bgr[0];
744 g = bgr[1];
745 b = bgr[2];
746 }
747 break;
748 case FORMAT_A32B32G32R32I:
749 {
750 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400751
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400752 r = static_cast<float>(abgr[0]);
753 g = static_cast<float>(abgr[1]);
754 b = static_cast<float>(abgr[2]);
755 a = static_cast<float>(abgr[3]);
756 }
757 break;
758 case FORMAT_A32B32G32R32UI:
759 {
760 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400761
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400762 r = static_cast<float>(abgr[0]);
763 g = static_cast<float>(abgr[1]);
764 b = static_cast<float>(abgr[2]);
765 a = static_cast<float>(abgr[3]);
766 }
767 break;
768 case FORMAT_X32B32G32R32I:
769 {
770 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400771
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400772 r = static_cast<float>(bgr[0]);
773 g = static_cast<float>(bgr[1]);
774 b = static_cast<float>(bgr[2]);
775 }
776 break;
777 case FORMAT_X32B32G32R32UI:
778 {
779 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400780
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400781 r = static_cast<float>(bgr[0]);
782 g = static_cast<float>(bgr[1]);
783 b = static_cast<float>(bgr[2]);
784 }
785 break;
786 case FORMAT_G32R32I:
787 {
788 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400789
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400790 r = static_cast<float>(gr[0]);
791 g = static_cast<float>(gr[1]);
792 }
793 break;
794 case FORMAT_G32R32UI:
795 {
796 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400797
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400798 r = static_cast<float>(gr[0]);
799 g = static_cast<float>(gr[1]);
800 }
801 break;
802 case FORMAT_R32I:
803 r = static_cast<float>(*((int*)element));
804 break;
805 case FORMAT_R32UI:
806 r = static_cast<float>(*((unsigned int*)element));
807 break;
John Bauman89401822014-05-06 15:04:28 -0400808 case FORMAT_V8U8:
809 {
810 unsigned short vu = *(unsigned short*)element;
811
812 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
813 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
814 }
815 break;
816 case FORMAT_L6V5U5:
817 {
818 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400819
John Bauman89401822014-05-06 15:04:28 -0400820 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
821 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
822 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
823 }
824 break;
825 case FORMAT_Q8W8V8U8:
826 {
827 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400828
John Bauman89401822014-05-06 15:04:28 -0400829 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
830 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
831 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
832 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
833 }
834 break;
835 case FORMAT_X8L8V8U8:
836 {
837 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400838
John Bauman89401822014-05-06 15:04:28 -0400839 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
840 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
841 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
842 }
843 break;
844 case FORMAT_R8G8B8:
845 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
846 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
847 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
848 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400849 case FORMAT_B8G8R8:
850 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
851 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
852 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
853 break;
John Bauman89401822014-05-06 15:04:28 -0400854 case FORMAT_V16U16:
855 {
856 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400857
John Bauman89401822014-05-06 15:04:28 -0400858 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
859 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
860 }
861 break;
862 case FORMAT_A2W10V10U10:
863 {
864 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400865
John Bauman89401822014-05-06 15:04:28 -0400866 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
867 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
868 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
869 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
870 }
871 break;
872 case FORMAT_A16W16V16U16:
873 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
874 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
875 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
876 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
877 break;
878 case FORMAT_Q16W16V16U16:
879 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
880 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
881 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
882 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
883 break;
884 case FORMAT_L8:
885 r =
886 g =
887 b = *(unsigned char*)element * (1.0f / 0xFF);
888 break;
889 case FORMAT_A4L4:
890 {
891 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400892
John Bauman89401822014-05-06 15:04:28 -0400893 r =
894 g =
895 b = (al & 0x0F) * (1.0f / 0x0F);
896 a = (al & 0xF0) * (1.0f / 0xF0);
897 }
898 break;
899 case FORMAT_L16:
900 r =
901 g =
902 b = *(unsigned short*)element * (1.0f / 0xFFFF);
903 break;
904 case FORMAT_A8L8:
905 r =
906 g =
907 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
908 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
909 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400910 case FORMAT_L16F:
911 r =
912 g =
913 b = *(half*)element;
914 break;
915 case FORMAT_A16L16F:
916 r =
917 g =
918 b = ((half*)element)[0];
919 a = ((half*)element)[1];
920 break;
921 case FORMAT_L32F:
922 r =
923 g =
924 b = *(float*)element;
925 break;
926 case FORMAT_A32L32F:
927 r =
928 g =
929 b = ((float*)element)[0];
930 a = ((float*)element)[1];
931 break;
932 case FORMAT_A16F:
933 a = *(half*)element;
934 break;
John Bauman89401822014-05-06 15:04:28 -0400935 case FORMAT_R16F:
936 r = *(half*)element;
937 break;
938 case FORMAT_G16R16F:
939 r = ((half*)element)[0];
940 g = ((half*)element)[1];
941 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400942 case FORMAT_B16G16R16F:
943 r = ((half*)element)[0];
944 g = ((half*)element)[1];
945 b = ((half*)element)[2];
946 break;
John Bauman89401822014-05-06 15:04:28 -0400947 case FORMAT_A16B16G16R16F:
948 r = ((half*)element)[0];
949 g = ((half*)element)[1];
950 b = ((half*)element)[2];
951 a = ((half*)element)[3];
952 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400953 case FORMAT_A32F:
954 a = *(float*)element;
955 break;
John Bauman89401822014-05-06 15:04:28 -0400956 case FORMAT_R32F:
957 r = *(float*)element;
958 break;
959 case FORMAT_G32R32F:
960 r = ((float*)element)[0];
961 g = ((float*)element)[1];
962 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400963 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400964 case FORMAT_B32G32R32F:
965 r = ((float*)element)[0];
966 g = ((float*)element)[1];
967 b = ((float*)element)[2];
968 break;
John Bauman89401822014-05-06 15:04:28 -0400969 case FORMAT_A32B32G32R32F:
970 r = ((float*)element)[0];
971 g = ((float*)element)[1];
972 b = ((float*)element)[2];
973 a = ((float*)element)[3];
974 break;
975 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500976 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400977 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400978 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500979 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400980 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400981 r = *(float*)element;
982 g = r;
983 b = r;
984 a = r;
985 break;
986 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500987 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400988 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400989 g = r;
990 b = r;
991 a = r;
992 break;
993 case FORMAT_S8:
994 r = *(unsigned char*)element * (1.0f / 0xFF);
995 break;
996 default:
997 ASSERT(false);
998 }
999
1000 // if(sRGB)
1001 // {
1002 // r = sRGBtoLinear(r);
1003 // g = sRGBtoLinear(g);
1004 // b = sRGBtoLinear(b);
1005 // }
1006
1007 return Color<float>(r, g, b, a);
1008 }
1009
1010 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1011 {
1012 x -= 0.5f;
1013 y -= 0.5f;
1014 z -= 0.5f;
1015
1016 int x0 = clamp((int)x, 0, width - 1);
1017 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1018
1019 int y0 = clamp((int)y, 0, height - 1);
1020 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1021
1022 int z0 = clamp((int)z, 0, depth - 1);
1023 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1024
1025 Color<float> c000 = read(x0, y0, z0);
1026 Color<float> c100 = read(x1, y0, z0);
1027 Color<float> c010 = read(x0, y1, z0);
1028 Color<float> c110 = read(x1, y1, z0);
1029 Color<float> c001 = read(x0, y0, z1);
1030 Color<float> c101 = read(x1, y0, z1);
1031 Color<float> c011 = read(x0, y1, z1);
1032 Color<float> c111 = read(x1, y1, z1);
1033
1034 float fx = x - x0;
1035 float fy = y - y0;
1036 float fz = z - z0;
1037
1038 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1039 c100 *= fx * (1 - fy) * (1 - fz);
1040 c010 *= (1 - fx) * fy * (1 - fz);
1041 c110 *= fx * fy * (1 - fz);
1042 c001 *= (1 - fx) * (1 - fy) * fz;
1043 c101 *= fx * (1 - fy) * fz;
1044 c011 *= (1 - fx) * fy * fz;
1045 c111 *= fx * fy * fz;
1046
1047 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1048 }
1049
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001050 Color<float> Surface::Buffer::sample(float x, float y, int layer) const
John Bauman89401822014-05-06 15:04:28 -04001051 {
1052 x -= 0.5f;
1053 y -= 0.5f;
1054
1055 int x0 = clamp((int)x, 0, width - 1);
1056 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1057
1058 int y0 = clamp((int)y, 0, height - 1);
1059 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1060
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001061 Color<float> c00 = read(x0, y0, layer);
1062 Color<float> c10 = read(x1, y0, layer);
1063 Color<float> c01 = read(x0, y1, layer);
1064 Color<float> c11 = read(x1, y1, layer);
John Bauman89401822014-05-06 15:04:28 -04001065
1066 float fx = x - x0;
1067 float fy = y - y0;
1068
1069 c00 *= (1 - fx) * (1 - fy);
1070 c10 *= fx * (1 - fy);
1071 c01 *= (1 - fx) * fy;
1072 c11 *= fx * fy;
1073
1074 return c00 + c10 + c01 + c11;
1075 }
1076
John Bauman19bac1e2014-05-06 15:23:49 -04001077 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001078 {
1079 this->lock = lock;
1080
1081 switch(lock)
1082 {
1083 case LOCK_UNLOCKED:
1084 case LOCK_READONLY:
1085 break;
1086 case LOCK_WRITEONLY:
1087 case LOCK_READWRITE:
1088 case LOCK_DISCARD:
1089 dirty = true;
1090 break;
1091 default:
1092 ASSERT(false);
1093 }
1094
John Baumand4ae8632014-05-06 16:18:33 -04001095 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001096 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001097 x += border;
1098 y += border;
1099
John Baumand4ae8632014-05-06 16:18:33 -04001100 switch(format)
1101 {
1102 #if S3TC_SUPPORT
1103 case FORMAT_DXT1:
1104 #endif
1105 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001106 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001107 case FORMAT_R11_EAC:
1108 case FORMAT_SIGNED_R11_EAC:
1109 case FORMAT_RGB8_ETC2:
1110 case FORMAT_SRGB8_ETC2:
1111 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1112 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001113 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001114 case FORMAT_RG11_EAC:
1115 case FORMAT_SIGNED_RG11_EAC:
1116 case FORMAT_RGBA8_ETC2_EAC:
1117 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1118 case FORMAT_RGBA_ASTC_4x4_KHR:
1119 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1120 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1121 case FORMAT_RGBA_ASTC_5x4_KHR:
1122 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1123 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1124 case FORMAT_RGBA_ASTC_5x5_KHR:
1125 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1126 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1127 case FORMAT_RGBA_ASTC_6x5_KHR:
1128 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1129 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1130 case FORMAT_RGBA_ASTC_6x6_KHR:
1131 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1132 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1133 case FORMAT_RGBA_ASTC_8x5_KHR:
1134 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1135 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1136 case FORMAT_RGBA_ASTC_8x6_KHR:
1137 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1138 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1139 case FORMAT_RGBA_ASTC_8x8_KHR:
1140 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1141 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1142 case FORMAT_RGBA_ASTC_10x5_KHR:
1143 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1144 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1145 case FORMAT_RGBA_ASTC_10x6_KHR:
1146 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1147 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1148 case FORMAT_RGBA_ASTC_10x8_KHR:
1149 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1150 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1151 case FORMAT_RGBA_ASTC_10x10_KHR:
1152 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1153 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1154 case FORMAT_RGBA_ASTC_12x10_KHR:
1155 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1156 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1157 case FORMAT_RGBA_ASTC_12x12_KHR:
1158 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1159 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001160 #if S3TC_SUPPORT
1161 case FORMAT_DXT3:
1162 case FORMAT_DXT5:
1163 #endif
1164 case FORMAT_ATI2:
1165 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1166 default:
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001167 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001168 }
John Bauman89401822014-05-06 15:04:28 -04001169 }
1170
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001171 return nullptr;
John Bauman89401822014-05-06 15:04:28 -04001172 }
1173
1174 void Surface::Buffer::unlockRect()
1175 {
1176 lock = LOCK_UNLOCKED;
1177 }
1178
Nicolas Capensf41f0332017-05-30 15:25:50 -04001179 class SurfaceImplementation : public Surface
1180 {
1181 public:
1182 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1183 : Surface(width, height, depth, format, pixels, pitch, slice) {}
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001184 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0)
1185 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {}
Nicolas Capensf41f0332017-05-30 15:25:50 -04001186 ~SurfaceImplementation() override {};
1187
1188 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
1189 {
1190 return Surface::lockInternal(x, y, z, lock, client);
1191 }
1192
1193 void unlockInternal() override
1194 {
1195 Surface::unlockInternal();
1196 }
1197 };
1198
1199 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1200 {
1201 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
1202 }
1203
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001204 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided)
Nicolas Capensf41f0332017-05-30 15:25:50 -04001205 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001206 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided);
Nicolas Capensf41f0332017-05-30 15:25:50 -04001207 }
1208
Nicolas Capens477314b2015-06-09 16:47:29 -04001209 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1210 {
1211 resource = new Resource(0);
1212 hasParent = false;
1213 ownExternal = false;
1214 depth = max(1, depth);
1215
1216 external.buffer = pixels;
1217 external.width = width;
1218 external.height = height;
1219 external.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001220 external.samples = 1;
Nicolas Capens477314b2015-06-09 16:47:29 -04001221 external.format = format;
1222 external.bytes = bytes(external.format);
1223 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001224 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001225 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001226 external.sliceP = external.bytes ? slice / external.bytes : 0;
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001227 external.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001228 external.lock = LOCK_UNLOCKED;
1229 external.dirty = true;
1230
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001231 internal.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001232 internal.width = width;
1233 internal.height = height;
1234 internal.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001235 internal.samples = 1;
Nicolas Capens477314b2015-06-09 16:47:29 -04001236 internal.format = selectInternalFormat(format);
1237 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001238 internal.pitchB = pitchB(internal.width, 0, internal.format, false);
1239 internal.pitchP = pitchP(internal.width, 0, internal.format, false);
1240 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
1241 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
1242 internal.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001243 internal.lock = LOCK_UNLOCKED;
1244 internal.dirty = false;
1245
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001246 stencil.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001247 stencil.width = width;
1248 stencil.height = height;
1249 stencil.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001250 stencil.samples = 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001251 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
Nicolas Capens477314b2015-06-09 16:47:29 -04001252 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001253 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
1254 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
1255 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
1256 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
1257 stencil.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001258 stencil.lock = LOCK_UNLOCKED;
1259 stencil.dirty = false;
1260
Nicolas Capens73e18c12017-11-28 13:31:35 -05001261 dirtyContents = true;
Nicolas Capens477314b2015-06-09 16:47:29 -04001262 paletteUsed = 0;
1263 }
1264
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001265 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001266 {
1267 resource = texture ? texture : new Resource(0);
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001268 hasParent = texture != nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001269 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001270 depth = max(1, depth);
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001271 samples = max(1, samples);
John Bauman89401822014-05-06 15:04:28 -04001272
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001273 external.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001274 external.width = width;
1275 external.height = height;
1276 external.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001277 external.samples = (short)samples;
John Bauman89401822014-05-06 15:04:28 -04001278 external.format = format;
1279 external.bytes = bytes(external.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001280 external.pitchB = pitchB(external.width, 0, external.format, renderTarget && !texture);
1281 external.pitchP = pitchP(external.width, 0, external.format, renderTarget && !texture);
1282 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
1283 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
1284 external.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001285 external.lock = LOCK_UNLOCKED;
1286 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001287
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001288 internal.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001289 internal.width = width;
1290 internal.height = height;
1291 internal.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001292 internal.samples = (short)samples;
John Bauman89401822014-05-06 15:04:28 -04001293 internal.format = selectInternalFormat(format);
1294 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001295 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1296 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
1297 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
1298 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001299 internal.border = (short)border;
John Bauman89401822014-05-06 15:04:28 -04001300 internal.lock = LOCK_UNLOCKED;
1301 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001302
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001303 stencil.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001304 stencil.width = width;
1305 stencil.height = height;
1306 stencil.depth = depth;
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001307 stencil.samples = (short)samples;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001308 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
John Bauman89401822014-05-06 15:04:28 -04001309 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001310 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
1311 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
1312 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1313 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1314 stencil.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001315 stencil.lock = LOCK_UNLOCKED;
1316 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001317
Nicolas Capens73e18c12017-11-28 13:31:35 -05001318 dirtyContents = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001319 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001320 }
1321
1322 Surface::~Surface()
1323 {
Nicolas Capensbf7a8142017-05-19 10:57:28 -04001324 // sync() must be called before this destructor to ensure all locks have been released.
1325 // We can't call it here because the parent resource may already have been destroyed.
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001326 ASSERT(isUnlocked());
John Bauman8a4f6fc2014-05-06 15:26:18 -04001327
John Bauman89401822014-05-06 15:04:28 -04001328 if(!hasParent)
1329 {
1330 resource->destruct();
1331 }
1332
Nicolas Capens477314b2015-06-09 16:47:29 -04001333 if(ownExternal)
1334 {
1335 deallocate(external.buffer);
1336 }
John Bauman89401822014-05-06 15:04:28 -04001337
1338 if(internal.buffer != external.buffer)
1339 {
1340 deallocate(internal.buffer);
1341 }
1342
1343 deallocate(stencil.buffer);
1344
1345 external.buffer = 0;
1346 internal.buffer = 0;
1347 stencil.buffer = 0;
1348 }
1349
John Bauman19bac1e2014-05-06 15:23:49 -04001350 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001351 {
1352 resource->lock(client);
1353
1354 if(!external.buffer)
1355 {
1356 if(internal.buffer && identicalFormats())
1357 {
1358 external.buffer = internal.buffer;
1359 }
1360 else
1361 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001362 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format);
John Bauman89401822014-05-06 15:04:28 -04001363 }
1364 }
1365
1366 if(internal.dirty)
1367 {
1368 if(lock != LOCK_DISCARD)
1369 {
1370 update(external, internal);
1371 }
John Bauman66b8ab22014-05-06 15:57:45 -04001372
1373 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001374 }
1375
1376 switch(lock)
1377 {
1378 case LOCK_READONLY:
1379 break;
1380 case LOCK_WRITEONLY:
1381 case LOCK_READWRITE:
1382 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001383 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001384 break;
1385 default:
1386 ASSERT(false);
1387 }
1388
John Bauman19bac1e2014-05-06 15:23:49 -04001389 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001390 }
1391
1392 void Surface::unlockExternal()
1393 {
John Bauman89401822014-05-06 15:04:28 -04001394 external.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001395
1396 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001397 }
1398
John Bauman19bac1e2014-05-06 15:23:49 -04001399 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001400 {
1401 if(lock != LOCK_UNLOCKED)
1402 {
1403 resource->lock(client);
1404 }
1405
1406 if(!internal.buffer)
1407 {
1408 if(external.buffer && identicalFormats())
1409 {
1410 internal.buffer = external.buffer;
1411 }
1412 else
1413 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001414 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format);
John Bauman89401822014-05-06 15:04:28 -04001415 }
1416 }
1417
1418 // FIXME: WHQL requires conversion to lower external precision and back
1419 if(logPrecision >= WHQL)
1420 {
1421 if(internal.dirty && renderTarget && internal.format != external.format)
1422 {
1423 if(lock != LOCK_DISCARD)
1424 {
1425 switch(external.format)
1426 {
1427 case FORMAT_R3G3B2:
1428 case FORMAT_A8R3G3B2:
1429 case FORMAT_A1R5G5B5:
1430 case FORMAT_A2R10G10B10:
1431 case FORMAT_A2B10G10R10:
1432 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1433 unlockExternal();
1434 break;
1435 default:
1436 // Difference passes WHQL
1437 break;
1438 }
1439 }
1440 }
1441 }
1442
John Bauman66b8ab22014-05-06 15:57:45 -04001443 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001444 {
1445 if(lock != LOCK_DISCARD)
1446 {
1447 update(internal, external);
1448 }
John Bauman89401822014-05-06 15:04:28 -04001449
John Bauman66b8ab22014-05-06 15:57:45 -04001450 external.dirty = false;
1451 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001452 }
1453
1454 switch(lock)
1455 {
1456 case LOCK_UNLOCKED:
1457 case LOCK_READONLY:
1458 break;
1459 case LOCK_WRITEONLY:
1460 case LOCK_READWRITE:
1461 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001462 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001463 break;
1464 default:
1465 ASSERT(false);
1466 }
1467
1468 if(lock == LOCK_READONLY && client == PUBLIC)
1469 {
1470 resolve();
1471 }
1472
John Bauman19bac1e2014-05-06 15:23:49 -04001473 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001474 }
1475
1476 void Surface::unlockInternal()
1477 {
John Bauman89401822014-05-06 15:04:28 -04001478 internal.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001479
1480 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001481 }
1482
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001483 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001484 {
1485 resource->lock(client);
1486
1487 if(!stencil.buffer)
1488 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001489 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format);
John Bauman89401822014-05-06 15:04:28 -04001490 }
1491
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001492 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001493 }
1494
1495 void Surface::unlockStencil()
1496 {
John Bauman89401822014-05-06 15:04:28 -04001497 stencil.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001498
1499 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001500 }
1501
1502 int Surface::bytes(Format format)
1503 {
1504 switch(format)
1505 {
1506 case FORMAT_NULL: return 0;
1507 case FORMAT_P8: return 1;
1508 case FORMAT_A8P8: return 2;
1509 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001510 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001511 case FORMAT_R8: return 1;
1512 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001513 case FORMAT_R16I: return 2;
1514 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001515 case FORMAT_A8R3G3B2: return 2;
1516 case FORMAT_R5G6B5: return 2;
1517 case FORMAT_A1R5G5B5: return 2;
1518 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001519 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001520 case FORMAT_X4R4G4B4: return 2;
1521 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001522 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001523 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001524 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001525 case FORMAT_R32I: return 4;
1526 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001527 case FORMAT_X8R8G8B8: return 4;
1528 // case FORMAT_X8G8R8B8Q: return 4;
1529 case FORMAT_A8R8G8B8: return 4;
1530 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001531 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001532 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001533 case FORMAT_SRGB8_X8: return 4;
1534 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001535 case FORMAT_A8B8G8R8I: return 4;
1536 case FORMAT_R8UI: return 1;
1537 case FORMAT_G8R8UI: return 2;
1538 case FORMAT_X8B8G8R8UI: return 4;
1539 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001540 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001541 case FORMAT_R8I_SNORM: return 1;
1542 case FORMAT_G8R8I_SNORM: return 2;
1543 case FORMAT_X8B8G8R8I_SNORM: return 4;
1544 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001545 case FORMAT_A2R10G10B10: return 4;
1546 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001547 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001548 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001549 case FORMAT_G16R16I: return 4;
1550 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001551 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001552 case FORMAT_G32R32I: return 8;
1553 case FORMAT_G32R32UI: return 8;
1554 case FORMAT_X16B16G16R16I: return 8;
1555 case FORMAT_X16B16G16R16UI: return 8;
1556 case FORMAT_A16B16G16R16I: return 8;
1557 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001558 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001559 case FORMAT_X32B32G32R32I: return 16;
1560 case FORMAT_X32B32G32R32UI: return 16;
1561 case FORMAT_A32B32G32R32I: return 16;
1562 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001563 // Compressed formats
1564 #if S3TC_SUPPORT
1565 case FORMAT_DXT1: return 2; // Column of four pixels
1566 case FORMAT_DXT3: return 4; // Column of four pixels
1567 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001568 #endif
John Bauman89401822014-05-06 15:04:28 -04001569 case FORMAT_ATI1: return 2; // Column of four pixels
1570 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001571 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001572 case FORMAT_R11_EAC: return 2;
1573 case FORMAT_SIGNED_R11_EAC: return 2;
1574 case FORMAT_RG11_EAC: return 4;
1575 case FORMAT_SIGNED_RG11_EAC: return 4;
1576 case FORMAT_RGB8_ETC2: return 2;
1577 case FORMAT_SRGB8_ETC2: return 2;
1578 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1579 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1580 case FORMAT_RGBA8_ETC2_EAC: return 4;
1581 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1582 case FORMAT_RGBA_ASTC_4x4_KHR:
1583 case FORMAT_RGBA_ASTC_5x4_KHR:
1584 case FORMAT_RGBA_ASTC_5x5_KHR:
1585 case FORMAT_RGBA_ASTC_6x5_KHR:
1586 case FORMAT_RGBA_ASTC_6x6_KHR:
1587 case FORMAT_RGBA_ASTC_8x5_KHR:
1588 case FORMAT_RGBA_ASTC_8x6_KHR:
1589 case FORMAT_RGBA_ASTC_8x8_KHR:
1590 case FORMAT_RGBA_ASTC_10x5_KHR:
1591 case FORMAT_RGBA_ASTC_10x6_KHR:
1592 case FORMAT_RGBA_ASTC_10x8_KHR:
1593 case FORMAT_RGBA_ASTC_10x10_KHR:
1594 case FORMAT_RGBA_ASTC_12x10_KHR:
1595 case FORMAT_RGBA_ASTC_12x12_KHR:
1596 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1597 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1598 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1599 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1600 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1601 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1602 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1603 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1604 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1605 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1606 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1607 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1608 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1609 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001610 // Bumpmap formats
1611 case FORMAT_V8U8: return 2;
1612 case FORMAT_L6V5U5: return 2;
1613 case FORMAT_Q8W8V8U8: return 4;
1614 case FORMAT_X8L8V8U8: return 4;
1615 case FORMAT_A2W10V10U10: return 4;
1616 case FORMAT_V16U16: return 4;
1617 case FORMAT_A16W16V16U16: return 8;
1618 case FORMAT_Q16W16V16U16: return 8;
1619 // Luminance formats
1620 case FORMAT_L8: return 1;
1621 case FORMAT_A4L4: return 1;
1622 case FORMAT_L16: return 2;
1623 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001624 case FORMAT_L16F: return 2;
1625 case FORMAT_A16L16F: return 4;
1626 case FORMAT_L32F: return 4;
1627 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001628 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001629 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001630 case FORMAT_R16F: return 2;
1631 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001632 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001633 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001634 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001635 case FORMAT_R32F: return 4;
1636 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001637 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001638 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001639 case FORMAT_A32B32G32R32F: return 16;
1640 // Depth/stencil formats
1641 case FORMAT_D16: return 2;
1642 case FORMAT_D32: return 4;
1643 case FORMAT_D24X8: return 4;
1644 case FORMAT_D24S8: return 4;
1645 case FORMAT_D24FS8: return 4;
1646 case FORMAT_D32F: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001647 case FORMAT_D32FS8: return 4;
John Bauman89401822014-05-06 15:04:28 -04001648 case FORMAT_D32F_COMPLEMENTARY: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001649 case FORMAT_D32FS8_COMPLEMENTARY: return 4;
John Bauman89401822014-05-06 15:04:28 -04001650 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001651 case FORMAT_D32FS8_TEXTURE: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001652 case FORMAT_D32F_SHADOW: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001653 case FORMAT_D32FS8_SHADOW: return 4;
1654 case FORMAT_DF24S8: return 4;
1655 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001656 case FORMAT_INTZ: return 4;
1657 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001658 case FORMAT_YV12_BT601: return 1; // Y plane only
1659 case FORMAT_YV12_BT709: return 1; // Y plane only
1660 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001661 default:
1662 ASSERT(false);
1663 }
1664
1665 return 0;
1666 }
1667
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001668 int Surface::pitchB(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001669 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001670 width += 2 * border;
1671
John Bauman89401822014-05-06 15:04:28 -04001672 if(target || isDepth(format) || isStencil(format))
1673 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001674 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001675 }
1676
1677 switch(format)
1678 {
1679 #if S3TC_SUPPORT
1680 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001681 #endif
1682 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001683 case FORMAT_R11_EAC:
1684 case FORMAT_SIGNED_R11_EAC:
1685 case FORMAT_RGB8_ETC2:
1686 case FORMAT_SRGB8_ETC2:
1687 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1688 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001689 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001690 case FORMAT_RG11_EAC:
1691 case FORMAT_SIGNED_RG11_EAC:
1692 case FORMAT_RGBA8_ETC2_EAC:
1693 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1694 case FORMAT_RGBA_ASTC_4x4_KHR:
1695 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1696 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1697 case FORMAT_RGBA_ASTC_5x4_KHR:
1698 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1699 case FORMAT_RGBA_ASTC_5x5_KHR:
1700 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1701 return 16 * ((width + 4) / 5);
1702 case FORMAT_RGBA_ASTC_6x5_KHR:
1703 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1704 case FORMAT_RGBA_ASTC_6x6_KHR:
1705 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1706 return 16 * ((width + 5) / 6);
1707 case FORMAT_RGBA_ASTC_8x5_KHR:
1708 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1709 case FORMAT_RGBA_ASTC_8x6_KHR:
1710 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1711 case FORMAT_RGBA_ASTC_8x8_KHR:
1712 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1713 return 16 * ((width + 7) / 8);
1714 case FORMAT_RGBA_ASTC_10x5_KHR:
1715 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1716 case FORMAT_RGBA_ASTC_10x6_KHR:
1717 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1718 case FORMAT_RGBA_ASTC_10x8_KHR:
1719 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1720 case FORMAT_RGBA_ASTC_10x10_KHR:
1721 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1722 return 16 * ((width + 9) / 10);
1723 case FORMAT_RGBA_ASTC_12x10_KHR:
1724 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1725 case FORMAT_RGBA_ASTC_12x12_KHR:
1726 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1727 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001728 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001729 case FORMAT_DXT3:
1730 case FORMAT_DXT5:
1731 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001732 #endif
John Bauman89401822014-05-06 15:04:28 -04001733 case FORMAT_ATI1:
1734 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1735 case FORMAT_ATI2:
1736 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001737 case FORMAT_YV12_BT601:
1738 case FORMAT_YV12_BT709:
1739 case FORMAT_YV12_JFIF:
1740 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001741 default:
1742 return bytes(format) * width;
1743 }
1744 }
1745
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001746 int Surface::pitchP(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001747 {
1748 int B = bytes(format);
1749
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001750 return B > 0 ? pitchB(width, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001751 }
1752
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001753 int Surface::sliceB(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001754 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001755 height += 2 * border;
1756
John Bauman89401822014-05-06 15:04:28 -04001757 if(target || isDepth(format) || isStencil(format))
1758 {
1759 height = ((height + 1) & ~1);
1760 }
1761
1762 switch(format)
1763 {
1764 #if S3TC_SUPPORT
1765 case FORMAT_DXT1:
1766 case FORMAT_DXT3:
1767 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001768 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001769 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001770 case FORMAT_R11_EAC:
1771 case FORMAT_SIGNED_R11_EAC:
1772 case FORMAT_RG11_EAC:
1773 case FORMAT_SIGNED_RG11_EAC:
1774 case FORMAT_RGB8_ETC2:
1775 case FORMAT_SRGB8_ETC2:
1776 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1777 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1778 case FORMAT_RGBA8_ETC2_EAC:
1779 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1780 case FORMAT_RGBA_ASTC_4x4_KHR:
1781 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1782 case FORMAT_RGBA_ASTC_5x4_KHR:
1783 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001784 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001785 case FORMAT_RGBA_ASTC_5x5_KHR:
1786 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1787 case FORMAT_RGBA_ASTC_6x5_KHR:
1788 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1789 case FORMAT_RGBA_ASTC_8x5_KHR:
1790 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1791 case FORMAT_RGBA_ASTC_10x5_KHR:
1792 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001793 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001794 case FORMAT_RGBA_ASTC_6x6_KHR:
1795 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1796 case FORMAT_RGBA_ASTC_8x6_KHR:
1797 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1798 case FORMAT_RGBA_ASTC_10x6_KHR:
1799 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001800 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001801 case FORMAT_RGBA_ASTC_8x8_KHR:
1802 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1803 case FORMAT_RGBA_ASTC_10x8_KHR:
1804 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001805 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001806 case FORMAT_RGBA_ASTC_10x10_KHR:
1807 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1808 case FORMAT_RGBA_ASTC_12x10_KHR:
1809 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001810 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001811 case FORMAT_RGBA_ASTC_12x12_KHR:
1812 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001813 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001814 case FORMAT_ATI1:
1815 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001816 default:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001817 return pitchB(width, border, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001818 }
1819 }
1820
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001821 int Surface::sliceP(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001822 {
1823 int B = bytes(format);
1824
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001825 return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001826 }
1827
1828 void Surface::update(Buffer &destination, Buffer &source)
1829 {
1830 // ASSERT(source.lock != LOCK_UNLOCKED);
1831 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001832
John Bauman89401822014-05-06 15:04:28 -04001833 if(destination.buffer != source.buffer)
1834 {
1835 ASSERT(source.dirty && !destination.dirty);
1836
1837 switch(source.format)
1838 {
1839 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001840 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1841 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1842 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1843 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1844 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1845 #if S3TC_SUPPORT
1846 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1847 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1848 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001849 #endif
John Bauman89401822014-05-06 15:04:28 -04001850 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1851 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001852 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1853 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1854 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1855 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001856 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001857 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1858 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1859 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1860 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1861 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1862 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1863 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1864 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1865 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1866 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1867 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1868 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1869 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1870 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1871 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1872 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1873 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1874 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1875 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1876 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1877 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1878 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1879 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1880 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1881 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1882 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1883 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1884 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1885 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1886 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1887 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1888 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1889 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1890 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001891 default: genericUpdate(destination, source); break;
1892 }
1893 }
John Bauman89401822014-05-06 15:04:28 -04001894 }
1895
1896 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1897 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001898 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1899 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001900
1901 int depth = min(destination.depth, source.depth);
1902 int height = min(destination.height, source.height);
1903 int width = min(destination.width, source.width);
1904 int rowBytes = width * source.bytes;
1905
1906 for(int z = 0; z < depth; z++)
1907 {
1908 unsigned char *sourceRow = sourceSlice;
1909 unsigned char *destinationRow = destinationSlice;
1910
1911 for(int y = 0; y < height; y++)
1912 {
1913 if(source.format == destination.format)
1914 {
1915 memcpy(destinationRow, sourceRow, rowBytes);
1916 }
1917 else
1918 {
1919 unsigned char *sourceElement = sourceRow;
1920 unsigned char *destinationElement = destinationRow;
1921
1922 for(int x = 0; x < width; x++)
1923 {
1924 Color<float> color = source.read(sourceElement);
1925 destination.write(destinationElement, color);
1926
1927 sourceElement += source.bytes;
1928 destinationElement += destination.bytes;
1929 }
1930 }
1931
1932 sourceRow += source.pitchB;
1933 destinationRow += destination.pitchB;
1934 }
1935
1936 sourceSlice += source.sliceB;
1937 destinationSlice += destination.sliceB;
1938 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001939
1940 source.unlockRect();
1941 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001942 }
1943
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001944 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001945 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001946 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1947 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001948
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001949 int depth = min(destination.depth, source.depth);
1950 int height = min(destination.height, source.height);
1951 int width = min(destination.width, source.width);
1952
1953 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001954 {
1955 unsigned char *sourceRow = sourceSlice;
1956 unsigned char *destinationRow = destinationSlice;
1957
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001958 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04001959 {
1960 unsigned char *sourceElement = sourceRow;
1961 unsigned char *destinationElement = destinationRow;
1962
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001963 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04001964 {
1965 unsigned int b = sourceElement[0];
1966 unsigned int g = sourceElement[1];
1967 unsigned int r = sourceElement[2];
1968
1969 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1970
1971 sourceElement += source.bytes;
1972 destinationElement += destination.bytes;
1973 }
1974
1975 sourceRow += source.pitchB;
1976 destinationRow += destination.pitchB;
1977 }
1978
1979 sourceSlice += source.sliceB;
1980 destinationSlice += destination.sliceB;
1981 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001982
1983 source.unlockRect();
1984 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001985 }
1986
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001987 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001988 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001989 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1990 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001991
Nicolas Capensbfa23b32017-12-11 10:06:37 -05001992 int depth = min(destination.depth, source.depth);
1993 int height = min(destination.height, source.height);
1994 int width = min(destination.width, source.width);
1995
1996 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04001997 {
1998 unsigned char *sourceRow = sourceSlice;
1999 unsigned char *destinationRow = destinationSlice;
2000
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002001 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002002 {
2003 unsigned char *sourceElement = sourceRow;
2004 unsigned char *destinationElement = destinationRow;
2005
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002006 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002007 {
2008 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002009
John Bauman89401822014-05-06 15:04:28 -04002010 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2011 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2012 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
2013
2014 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2015
2016 sourceElement += source.bytes;
2017 destinationElement += destination.bytes;
2018 }
2019
2020 sourceRow += source.pitchB;
2021 destinationRow += destination.pitchB;
2022 }
2023
2024 sourceSlice += source.sliceB;
2025 destinationSlice += destination.sliceB;
2026 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002027
2028 source.unlockRect();
2029 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002030 }
2031
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002032 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002033 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002034 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2035 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002036
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002037 int depth = min(destination.depth, source.depth);
2038 int height = min(destination.height, source.height);
2039 int width = min(destination.width, source.width);
2040
2041 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002042 {
2043 unsigned char *sourceRow = sourceSlice;
2044 unsigned char *destinationRow = destinationSlice;
2045
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002046 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002047 {
2048 unsigned char *sourceElement = sourceRow;
2049 unsigned char *destinationElement = destinationRow;
2050
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002051 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002052 {
2053 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002054
John Bauman89401822014-05-06 15:04:28 -04002055 unsigned int a = (argb & 0x8000) * 130560;
2056 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2057 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2058 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
2059
2060 *(unsigned int*)destinationElement = a | r | g | b;
2061
2062 sourceElement += source.bytes;
2063 destinationElement += destination.bytes;
2064 }
2065
2066 sourceRow += source.pitchB;
2067 destinationRow += destination.pitchB;
2068 }
2069
2070 sourceSlice += source.sliceB;
2071 destinationSlice += destination.sliceB;
2072 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002073
2074 source.unlockRect();
2075 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002076 }
2077
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002078 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002079 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002080 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2081 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002082
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002083 int depth = min(destination.depth, source.depth);
2084 int height = min(destination.height, source.height);
2085 int width = min(destination.width, source.width);
2086
2087 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002088 {
2089 unsigned char *sourceRow = sourceSlice;
2090 unsigned char *destinationRow = destinationSlice;
2091
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002092 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002093 {
2094 unsigned char *sourceElement = sourceRow;
2095 unsigned char *destinationElement = destinationRow;
2096
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002097 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002098 {
2099 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002100
John Bauman89401822014-05-06 15:04:28 -04002101 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2102 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2103 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2104
2105 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2106
2107 sourceElement += source.bytes;
2108 destinationElement += destination.bytes;
2109 }
2110
2111 sourceRow += source.pitchB;
2112 destinationRow += destination.pitchB;
2113 }
2114
2115 sourceSlice += source.sliceB;
2116 destinationSlice += destination.sliceB;
2117 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002118
2119 source.unlockRect();
2120 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002121 }
2122
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002123 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002124 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002125 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2126 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002127
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002128 int depth = min(destination.depth, source.depth);
2129 int height = min(destination.height, source.height);
2130 int width = min(destination.width, source.width);
2131
2132 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002133 {
2134 unsigned char *sourceRow = sourceSlice;
2135 unsigned char *destinationRow = destinationSlice;
2136
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002137 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002138 {
2139 unsigned char *sourceElement = sourceRow;
2140 unsigned char *destinationElement = destinationRow;
2141
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002142 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002143 {
2144 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002145
John Bauman89401822014-05-06 15:04:28 -04002146 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2147 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2148 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2149 unsigned int b = (argb & 0x000F) * 0x00000011;
2150
2151 *(unsigned int*)destinationElement = a | r | g | b;
2152
2153 sourceElement += source.bytes;
2154 destinationElement += destination.bytes;
2155 }
2156
2157 sourceRow += source.pitchB;
2158 destinationRow += destination.pitchB;
2159 }
2160
2161 sourceSlice += source.sliceB;
2162 destinationSlice += destination.sliceB;
2163 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002164
2165 source.unlockRect();
2166 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002167 }
2168
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002169 void Surface::decodeP8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002170 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002171 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2172 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002173
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002174 int depth = min(destination.depth, source.depth);
2175 int height = min(destination.height, source.height);
2176 int width = min(destination.width, source.width);
2177
2178 for(int z = 0; z < depth; z++)
John Bauman89401822014-05-06 15:04:28 -04002179 {
2180 unsigned char *sourceRow = sourceSlice;
2181 unsigned char *destinationRow = destinationSlice;
2182
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002183 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04002184 {
2185 unsigned char *sourceElement = sourceRow;
2186 unsigned char *destinationElement = destinationRow;
2187
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002188 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04002189 {
2190 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2191
2192 unsigned int r = (abgr & 0x000000FF) << 16;
2193 unsigned int g = (abgr & 0x0000FF00) << 0;
2194 unsigned int b = (abgr & 0x00FF0000) >> 16;
2195 unsigned int a = (abgr & 0xFF000000) >> 0;
2196
2197 *(unsigned int*)destinationElement = a | r | g | b;
2198
2199 sourceElement += source.bytes;
2200 destinationElement += destination.bytes;
2201 }
2202
2203 sourceRow += source.pitchB;
2204 destinationRow += destination.pitchB;
2205 }
2206
2207 sourceSlice += source.sliceB;
2208 destinationSlice += destination.sliceB;
2209 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002210
2211 source.unlockRect();
2212 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002213 }
2214
2215#if S3TC_SUPPORT
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002216 void Surface::decodeDXT1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002217 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002218 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2219 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002220
2221 for(int z = 0; z < external.depth; z++)
2222 {
2223 unsigned int *dest = destSlice;
2224
2225 for(int y = 0; y < external.height; y += 4)
2226 {
2227 for(int x = 0; x < external.width; x += 4)
2228 {
2229 Color<byte> c[4];
2230
2231 c[0] = source->c0;
2232 c[1] = source->c1;
2233
2234 if(source->c0 > source->c1) // No transparency
2235 {
2236 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2237 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2238 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2239 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2240 c[2].a = 0xFF;
2241
2242 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2243 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2244 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2245 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2246 c[3].a = 0xFF;
2247 }
2248 else // c3 transparent
2249 {
2250 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2251 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2252 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2253 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2254 c[2].a = 0xFF;
2255
2256 c[3].r = 0;
2257 c[3].g = 0;
2258 c[3].b = 0;
2259 c[3].a = 0;
2260 }
2261
2262 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2263 {
2264 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2265 {
2266 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2267 }
2268 }
2269
2270 source++;
2271 }
2272 }
2273
2274 (byte*&)destSlice += internal.sliceB;
2275 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002276
2277 external.unlockRect();
2278 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002279 }
2280
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002281 void Surface::decodeDXT3(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002282 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002283 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2284 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002285
2286 for(int z = 0; z < external.depth; z++)
2287 {
2288 unsigned int *dest = destSlice;
2289
2290 for(int y = 0; y < external.height; y += 4)
2291 {
2292 for(int x = 0; x < external.width; x += 4)
2293 {
2294 Color<byte> c[4];
2295
2296 c[0] = source->c0;
2297 c[1] = source->c1;
2298
2299 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2300 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2301 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2302 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2303
2304 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2305 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2306 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2307 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2308
2309 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2310 {
2311 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2312 {
2313 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2314 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2315
2316 dest[(x + i) + (y + j) * internal.width] = color;
2317 }
2318 }
2319
2320 source++;
2321 }
2322 }
2323
2324 (byte*&)destSlice += internal.sliceB;
2325 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002326
2327 external.unlockRect();
2328 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002329 }
2330
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002331 void Surface::decodeDXT5(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002332 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002333 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2334 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002335
2336 for(int z = 0; z < external.depth; z++)
2337 {
2338 unsigned int *dest = destSlice;
2339
2340 for(int y = 0; y < external.height; y += 4)
2341 {
2342 for(int x = 0; x < external.width; x += 4)
2343 {
2344 Color<byte> c[4];
2345
2346 c[0] = source->c0;
2347 c[1] = source->c1;
2348
2349 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2350 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2351 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2352 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2353
2354 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2355 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2356 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2357 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2358
2359 byte a[8];
2360
2361 a[0] = source->a0;
2362 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002363
John Bauman89401822014-05-06 15:04:28 -04002364 if(a[0] > a[1])
2365 {
2366 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2367 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2368 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2369 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2370 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2371 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2372 }
2373 else
2374 {
2375 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2376 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2377 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2378 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2379 a[6] = 0;
2380 a[7] = 0xFF;
2381 }
2382
2383 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2384 {
2385 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2386 {
2387 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2388 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002389
John Bauman89401822014-05-06 15:04:28 -04002390 dest[(x + i) + (y + j) * internal.width] = color;
2391 }
2392 }
2393
2394 source++;
2395 }
2396 }
2397
2398 (byte*&)destSlice += internal.sliceB;
2399 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002400
2401 external.unlockRect();
2402 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002403 }
Nicolas Capens22658242014-11-29 00:31:41 -05002404#endif
John Bauman89401822014-05-06 15:04:28 -04002405
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002406 void Surface::decodeATI1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002407 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002408 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2409 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002410
2411 for(int z = 0; z < external.depth; z++)
2412 {
2413 byte *dest = destSlice;
2414
2415 for(int y = 0; y < external.height; y += 4)
2416 {
2417 for(int x = 0; x < external.width; x += 4)
2418 {
2419 byte r[8];
2420
2421 r[0] = source->r0;
2422 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002423
John Bauman89401822014-05-06 15:04:28 -04002424 if(r[0] > r[1])
2425 {
2426 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2427 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2428 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2429 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2430 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2431 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2432 }
2433 else
2434 {
2435 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2436 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2437 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2438 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2439 r[6] = 0;
2440 r[7] = 0xFF;
2441 }
2442
2443 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2444 {
2445 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2446 {
2447 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2448 }
2449 }
2450
2451 source++;
2452 }
2453 }
2454
2455 destSlice += internal.sliceB;
2456 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002457
2458 external.unlockRect();
2459 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002460 }
2461
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002462 void Surface::decodeATI2(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002463 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002464 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2465 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002466
2467 for(int z = 0; z < external.depth; z++)
2468 {
2469 word *dest = destSlice;
2470
2471 for(int y = 0; y < external.height; y += 4)
2472 {
2473 for(int x = 0; x < external.width; x += 4)
2474 {
2475 byte X[8];
2476
2477 X[0] = source->x0;
2478 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002479
John Bauman89401822014-05-06 15:04:28 -04002480 if(X[0] > X[1])
2481 {
2482 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2483 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2484 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2485 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2486 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2487 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2488 }
2489 else
2490 {
2491 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2492 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2493 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2494 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2495 X[6] = 0;
2496 X[7] = 0xFF;
2497 }
2498
2499 byte Y[8];
2500
2501 Y[0] = source->y0;
2502 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002503
John Bauman89401822014-05-06 15:04:28 -04002504 if(Y[0] > Y[1])
2505 {
2506 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2507 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2508 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2509 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2510 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2511 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2512 }
2513 else
2514 {
2515 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2516 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2517 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2518 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2519 Y[6] = 0;
2520 Y[7] = 0xFF;
2521 }
2522
2523 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2524 {
2525 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2526 {
2527 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2528 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2529
2530 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2531 }
2532 }
2533
2534 source++;
2535 }
2536 }
2537
2538 (byte*&)destSlice += internal.sliceB;
2539 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002540
2541 external.unlockRect();
2542 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002543 }
Nicolas Capens22658242014-11-29 00:31:41 -05002544
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002545 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002546 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002547 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002548 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002549 external.unlockRect();
2550 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002551
Alexis Hetu0de50d42015-09-09 13:56:41 -04002552 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002553 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002554 static byte sRGBtoLinearTable[256];
2555 static bool sRGBtoLinearTableDirty = true;
2556 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002557 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002558 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002559 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002560 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002561 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002562 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002563 }
2564
Alexis Hetu0de50d42015-09-09 13:56:41 -04002565 // Perform sRGB conversion in place after decoding
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002566 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002567 for(int y = 0; y < internal.height; y++)
2568 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002569 byte *srcRow = src + y * internal.pitchB;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002570 for(int x = 0; x < internal.width; x++)
2571 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002572 byte *srcPix = srcRow + x * internal.bytes;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002573 for(int i = 0; i < 3; i++)
2574 {
2575 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2576 }
2577 }
2578 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002579 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002580 }
2581 }
John Bauman89401822014-05-06 15:04:28 -04002582
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002583 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002584 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002585 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002586
Alexis Hetuf46493f2017-12-18 15:32:26 -05002587 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
2588 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002589 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002590 external.unlockRect();
Alexis Hetu0de50d42015-09-09 13:56:41 -04002591
Alexis Hetuf46493f2017-12-18 15:32:26 -05002592 // FIXME: We convert EAC data to float, until signed short internal formats are supported
2593 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats
2594 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f));
2595 for(int y = 0; y < internal.height; y++)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002596 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002597 byte* srcRow = src + y * internal.pitchB;
2598 for(int x = internal.width - 1; x >= 0; x--)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002599 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002600 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes);
2601 float* dstPix = reinterpret_cast<float*>(srcPix);
2602 for(int c = nbChannels - 1; c >= 0; c--)
Alexis Hetu0de50d42015-09-09 13:56:41 -04002603 {
Alexis Hetuf46493f2017-12-18 15:32:26 -05002604 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002605 }
2606 }
2607 }
Alexis Hetuf46493f2017-12-18 15:32:26 -05002608
2609 internal.unlockRect();
Alexis Hetu460e41f2015-09-01 10:58:37 -04002610 }
2611
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002612 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002613 {
2614 }
2615
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002616 unsigned int Surface::size(int width, int height, int depth, int border, int samples, Format format)
John Bauman89401822014-05-06 15:04:28 -04002617 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002618 width += 2 * border;
2619 height += 2 * border;
2620
Nicolas Capens00555c42015-07-21 15:15:30 -04002621 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002622 int width4 = align(width, 4);
2623 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002624
2625 switch(format)
2626 {
2627 #if S3TC_SUPPORT
2628 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002629 #endif
John Bauman89401822014-05-06 15:04:28 -04002630 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002631 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002632 case FORMAT_R11_EAC:
2633 case FORMAT_SIGNED_R11_EAC:
2634 case FORMAT_RGB8_ETC2:
2635 case FORMAT_SRGB8_ETC2:
2636 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2637 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002638 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002639 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002640 case FORMAT_DXT3:
2641 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002642 #endif
John Bauman89401822014-05-06 15:04:28 -04002643 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002644 case FORMAT_RG11_EAC:
2645 case FORMAT_SIGNED_RG11_EAC:
2646 case FORMAT_RGBA8_ETC2_EAC:
2647 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2648 case FORMAT_RGBA_ASTC_4x4_KHR:
2649 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002650 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002651 case FORMAT_RGBA_ASTC_5x4_KHR:
2652 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2653 return align(width, 5) * height4 * depth;
2654 case FORMAT_RGBA_ASTC_5x5_KHR:
2655 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2656 return align(width, 5) * align(height, 5) * depth;
2657 case FORMAT_RGBA_ASTC_6x5_KHR:
2658 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2659 return align(width, 6) * align(height, 5) * depth;
2660 case FORMAT_RGBA_ASTC_6x6_KHR:
2661 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2662 return align(width, 6) * align(height, 6) * depth;
2663 case FORMAT_RGBA_ASTC_8x5_KHR:
2664 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2665 return align(width, 8) * align(height, 5) * depth;
2666 case FORMAT_RGBA_ASTC_8x6_KHR:
2667 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2668 return align(width, 8) * align(height, 6) * depth;
2669 case FORMAT_RGBA_ASTC_8x8_KHR:
2670 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2671 return align(width, 8) * align(height, 8) * depth;
2672 case FORMAT_RGBA_ASTC_10x5_KHR:
2673 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2674 return align(width, 10) * align(height, 5) * depth;
2675 case FORMAT_RGBA_ASTC_10x6_KHR:
2676 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2677 return align(width, 10) * align(height, 6) * depth;
2678 case FORMAT_RGBA_ASTC_10x8_KHR:
2679 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2680 return align(width, 10) * align(height, 8) * depth;
2681 case FORMAT_RGBA_ASTC_10x10_KHR:
2682 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2683 return align(width, 10) * align(height, 10) * depth;
2684 case FORMAT_RGBA_ASTC_12x10_KHR:
2685 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2686 return align(width, 12) * align(height, 10) * depth;
2687 case FORMAT_RGBA_ASTC_12x12_KHR:
2688 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2689 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002690 case FORMAT_YV12_BT601:
2691 case FORMAT_YV12_BT709:
2692 case FORMAT_YV12_JFIF:
2693 {
2694 unsigned int YStride = align(width, 16);
2695 unsigned int YSize = YStride * height;
2696 unsigned int CStride = align(YStride / 2, 16);
Nicolas Capens0bac2852016-05-07 06:09:58 -04002697 unsigned int CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002698
2699 return YSize + 2 * CSize;
2700 }
John Bauman89401822014-05-06 15:04:28 -04002701 default:
Nicolas Capensbfa23b32017-12-11 10:06:37 -05002702 return bytes(format) * width * height * depth * samples;
John Bauman89401822014-05-06 15:04:28 -04002703 }
John Bauman89401822014-05-06 15:04:28 -04002704 }
2705
2706 bool Surface::isStencil(Format format)
2707 {
2708 switch(format)
2709 {
2710 case FORMAT_D32:
2711 case FORMAT_D16:
2712 case FORMAT_D24X8:
2713 case FORMAT_D32F:
2714 case FORMAT_D32F_COMPLEMENTARY:
2715 case FORMAT_D32F_LOCKABLE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002716 case FORMAT_D32F_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002717 return false;
2718 case FORMAT_D24S8:
2719 case FORMAT_D24FS8:
2720 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002721 case FORMAT_DF24S8:
2722 case FORMAT_DF16S8:
2723 case FORMAT_D32FS8_TEXTURE:
2724 case FORMAT_D32FS8_SHADOW:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002725 case FORMAT_D32FS8:
2726 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002727 case FORMAT_INTZ:
2728 return true;
2729 default:
2730 return false;
2731 }
2732 }
2733
2734 bool Surface::isDepth(Format format)
2735 {
2736 switch(format)
2737 {
2738 case FORMAT_D32:
2739 case FORMAT_D16:
2740 case FORMAT_D24X8:
2741 case FORMAT_D24S8:
2742 case FORMAT_D24FS8:
2743 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002744 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002745 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002746 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002747 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002748 case FORMAT_DF24S8:
2749 case FORMAT_DF16S8:
2750 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002751 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002752 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002753 case FORMAT_INTZ:
2754 return true;
2755 case FORMAT_S8:
2756 return false;
2757 default:
2758 return false;
2759 }
2760 }
2761
Alexis Hetub9dda642016-10-06 11:25:32 -04002762 bool Surface::hasQuadLayout(Format format)
2763 {
2764 switch(format)
2765 {
2766 case FORMAT_D32:
2767 case FORMAT_D16:
2768 case FORMAT_D24X8:
2769 case FORMAT_D24S8:
2770 case FORMAT_D24FS8:
2771 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002772 case FORMAT_D32FS8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002773 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002774 case FORMAT_D32FS8_COMPLEMENTARY:
Alexis Hetub9dda642016-10-06 11:25:32 -04002775 case FORMAT_DF24S8:
2776 case FORMAT_DF16S8:
2777 case FORMAT_INTZ:
2778 case FORMAT_S8:
2779 case FORMAT_A8G8R8B8Q:
2780 case FORMAT_X8G8R8B8Q:
2781 return true;
2782 case FORMAT_D32F_LOCKABLE:
2783 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002784 case FORMAT_D32F_SHADOW:
Alexis Hetub9dda642016-10-06 11:25:32 -04002785 case FORMAT_D32FS8_SHADOW:
2786 default:
2787 break;
2788 }
2789
2790 return false;
2791 }
2792
John Bauman89401822014-05-06 15:04:28 -04002793 bool Surface::isPalette(Format format)
2794 {
2795 switch(format)
2796 {
2797 case FORMAT_P8:
2798 case FORMAT_A8P8:
2799 return true;
2800 default:
2801 return false;
2802 }
2803 }
2804
2805 bool Surface::isFloatFormat(Format format)
2806 {
2807 switch(format)
2808 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002809 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002810 case FORMAT_R8G8B8:
2811 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002812 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002813 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002814 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002815 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002816 case FORMAT_SRGB8_X8:
2817 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002818 case FORMAT_A8B8G8R8I:
2819 case FORMAT_R8UI:
2820 case FORMAT_G8R8UI:
2821 case FORMAT_X8B8G8R8UI:
2822 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002823 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002824 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002825 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002826 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002827 case FORMAT_R8I_SNORM:
2828 case FORMAT_G8R8I_SNORM:
2829 case FORMAT_X8B8G8R8I_SNORM:
2830 case FORMAT_A8B8G8R8I_SNORM:
2831 case FORMAT_R16I:
2832 case FORMAT_R16UI:
2833 case FORMAT_G16R16I:
2834 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002835 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002836 case FORMAT_X16B16G16R16I:
2837 case FORMAT_X16B16G16R16UI:
2838 case FORMAT_A16B16G16R16I:
2839 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002840 case FORMAT_A16B16G16R16:
2841 case FORMAT_V8U8:
2842 case FORMAT_Q8W8V8U8:
2843 case FORMAT_X8L8V8U8:
2844 case FORMAT_V16U16:
2845 case FORMAT_A16W16V16U16:
2846 case FORMAT_Q16W16V16U16:
2847 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002848 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002849 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002850 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002851 case FORMAT_L8:
2852 case FORMAT_L16:
2853 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002854 case FORMAT_YV12_BT601:
2855 case FORMAT_YV12_BT709:
2856 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002857 case FORMAT_R32I:
2858 case FORMAT_R32UI:
2859 case FORMAT_G32R32I:
2860 case FORMAT_G32R32UI:
2861 case FORMAT_X32B32G32R32I:
2862 case FORMAT_X32B32G32R32UI:
2863 case FORMAT_A32B32G32R32I:
2864 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002865 return false;
Nicolas Capens400667e2017-03-29 14:40:14 -04002866 case FORMAT_R16F:
2867 case FORMAT_G16R16F:
2868 case FORMAT_B16G16R16F:
2869 case FORMAT_A16B16G16R16F:
John Bauman89401822014-05-06 15:04:28 -04002870 case FORMAT_R32F:
2871 case FORMAT_G32R32F:
Nicolas Capensc018e082016-12-13 10:19:33 -05002872 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002873 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002874 case FORMAT_A32B32G32R32F:
2875 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002876 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002877 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002878 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002879 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002880 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002881 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002882 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002883 case FORMAT_L16F:
2884 case FORMAT_A16L16F:
2885 case FORMAT_L32F:
2886 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002887 return true;
2888 default:
2889 ASSERT(false);
2890 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002891
John Bauman89401822014-05-06 15:04:28 -04002892 return false;
2893 }
2894
2895 bool Surface::isUnsignedComponent(Format format, int component)
2896 {
2897 switch(format)
2898 {
2899 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002900 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002901 case FORMAT_R8G8B8:
2902 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002903 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002904 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002905 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002906 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002907 case FORMAT_SRGB8_X8:
2908 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002909 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002910 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002911 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002912 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002913 case FORMAT_G16R16UI:
2914 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002915 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002916 case FORMAT_A16B16G16R16UI:
2917 case FORMAT_R32UI:
2918 case FORMAT_G32R32UI:
2919 case FORMAT_X32B32G32R32UI:
2920 case FORMAT_A32B32G32R32UI:
2921 case FORMAT_R8UI:
2922 case FORMAT_G8R8UI:
2923 case FORMAT_X8B8G8R8UI:
2924 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002925 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002926 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002927 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002928 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002929 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002930 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002931 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002932 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002933 case FORMAT_A8:
2934 case FORMAT_R8:
2935 case FORMAT_L8:
2936 case FORMAT_L16:
2937 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002938 case FORMAT_YV12_BT601:
2939 case FORMAT_YV12_BT709:
2940 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002941 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002942 case FORMAT_A8B8G8R8I:
2943 case FORMAT_A16B16G16R16I:
2944 case FORMAT_A32B32G32R32I:
2945 case FORMAT_A8B8G8R8I_SNORM:
2946 case FORMAT_Q8W8V8U8:
2947 case FORMAT_Q16W16V16U16:
2948 case FORMAT_A32B32G32R32F:
2949 return false;
2950 case FORMAT_R32F:
2951 case FORMAT_R8I:
2952 case FORMAT_R16I:
2953 case FORMAT_R32I:
2954 case FORMAT_R8I_SNORM:
2955 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002956 case FORMAT_V8U8:
2957 case FORMAT_X8L8V8U8:
2958 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002959 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002960 case FORMAT_G8R8I:
2961 case FORMAT_G16R16I:
2962 case FORMAT_G32R32I:
2963 case FORMAT_G8R8I_SNORM:
2964 return component >= 2;
2965 case FORMAT_A16W16V16U16:
Nicolas Capens2e363b02016-12-14 10:32:36 -05002966 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002967 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002968 case FORMAT_X8B8G8R8I:
2969 case FORMAT_X16B16G16R16I:
2970 case FORMAT_X32B32G32R32I:
2971 case FORMAT_X8B8G8R8I_SNORM:
2972 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002973 default:
2974 ASSERT(false);
2975 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002976
John Bauman89401822014-05-06 15:04:28 -04002977 return false;
2978 }
2979
2980 bool Surface::isSRGBreadable(Format format)
2981 {
2982 // Keep in sync with Capabilities::isSRGBreadable
2983 switch(format)
2984 {
2985 case FORMAT_L8:
2986 case FORMAT_A8L8:
2987 case FORMAT_R8G8B8:
2988 case FORMAT_A8R8G8B8:
2989 case FORMAT_X8R8G8B8:
2990 case FORMAT_A8B8G8R8:
2991 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002992 case FORMAT_SRGB8_X8:
2993 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002994 case FORMAT_R5G6B5:
2995 case FORMAT_X1R5G5B5:
2996 case FORMAT_A1R5G5B5:
2997 case FORMAT_A4R4G4B4:
2998 #if S3TC_SUPPORT
2999 case FORMAT_DXT1:
3000 case FORMAT_DXT3:
3001 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003002 #endif
John Bauman89401822014-05-06 15:04:28 -04003003 case FORMAT_ATI1:
3004 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04003005 return true;
3006 default:
3007 return false;
3008 }
John Bauman89401822014-05-06 15:04:28 -04003009 }
3010
3011 bool Surface::isSRGBwritable(Format format)
3012 {
3013 // Keep in sync with Capabilities::isSRGBwritable
3014 switch(format)
3015 {
3016 case FORMAT_NULL:
3017 case FORMAT_A8R8G8B8:
3018 case FORMAT_X8R8G8B8:
3019 case FORMAT_A8B8G8R8:
3020 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04003021 case FORMAT_SRGB8_X8:
3022 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04003023 case FORMAT_R5G6B5:
3024 return true;
3025 default:
3026 return false;
3027 }
3028 }
3029
3030 bool Surface::isCompressed(Format format)
3031 {
3032 switch(format)
3033 {
3034 #if S3TC_SUPPORT
3035 case FORMAT_DXT1:
3036 case FORMAT_DXT3:
3037 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003038 #endif
John Bauman89401822014-05-06 15:04:28 -04003039 case FORMAT_ATI1:
3040 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05003041 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003042 case FORMAT_R11_EAC:
3043 case FORMAT_SIGNED_R11_EAC:
3044 case FORMAT_RG11_EAC:
3045 case FORMAT_SIGNED_RG11_EAC:
3046 case FORMAT_RGB8_ETC2:
3047 case FORMAT_SRGB8_ETC2:
3048 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3049 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3050 case FORMAT_RGBA8_ETC2_EAC:
3051 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3052 case FORMAT_RGBA_ASTC_4x4_KHR:
3053 case FORMAT_RGBA_ASTC_5x4_KHR:
3054 case FORMAT_RGBA_ASTC_5x5_KHR:
3055 case FORMAT_RGBA_ASTC_6x5_KHR:
3056 case FORMAT_RGBA_ASTC_6x6_KHR:
3057 case FORMAT_RGBA_ASTC_8x5_KHR:
3058 case FORMAT_RGBA_ASTC_8x6_KHR:
3059 case FORMAT_RGBA_ASTC_8x8_KHR:
3060 case FORMAT_RGBA_ASTC_10x5_KHR:
3061 case FORMAT_RGBA_ASTC_10x6_KHR:
3062 case FORMAT_RGBA_ASTC_10x8_KHR:
3063 case FORMAT_RGBA_ASTC_10x10_KHR:
3064 case FORMAT_RGBA_ASTC_12x10_KHR:
3065 case FORMAT_RGBA_ASTC_12x12_KHR:
3066 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3067 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3068 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3069 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3070 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3071 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3072 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3073 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3074 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3075 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3076 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3077 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3078 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3079 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04003080 return true;
John Bauman89401822014-05-06 15:04:28 -04003081 default:
3082 return false;
3083 }
3084 }
3085
Nicolas Capens492887a2017-03-27 14:50:51 -04003086 bool Surface::isSignedNonNormalizedInteger(Format format)
Alexis Hetu43577b82015-10-21 15:32:16 -04003087 {
3088 switch(format)
3089 {
3090 case FORMAT_A8B8G8R8I:
3091 case FORMAT_X8B8G8R8I:
3092 case FORMAT_G8R8I:
3093 case FORMAT_R8I:
Alexis Hetu43577b82015-10-21 15:32:16 -04003094 case FORMAT_A16B16G16R16I:
3095 case FORMAT_X16B16G16R16I:
3096 case FORMAT_G16R16I:
3097 case FORMAT_R16I:
Alexis Hetu91dd1c42017-07-18 13:03:42 -04003098 case FORMAT_A32B32G32R32I:
3099 case FORMAT_X32B32G32R32I:
3100 case FORMAT_G32R32I:
3101 case FORMAT_R32I:
Nicolas Capens492887a2017-03-27 14:50:51 -04003102 return true;
3103 default:
3104 return false;
3105 }
3106 }
3107
3108 bool Surface::isUnsignedNonNormalizedInteger(Format format)
3109 {
3110 switch(format)
3111 {
Alexis Hetu91dd1c42017-07-18 13:03:42 -04003112 case FORMAT_A8B8G8R8UI:
3113 case FORMAT_X8B8G8R8UI:
3114 case FORMAT_G8R8UI:
3115 case FORMAT_R8UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04003116 case FORMAT_A16B16G16R16UI:
3117 case FORMAT_X16B16G16R16UI:
3118 case FORMAT_G16R16UI:
3119 case FORMAT_R16UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04003120 case FORMAT_A32B32G32R32UI:
3121 case FORMAT_X32B32G32R32UI:
3122 case FORMAT_G32R32UI:
3123 case FORMAT_R32UI:
3124 return true;
3125 default:
3126 return false;
3127 }
3128 }
3129
Nicolas Capens492887a2017-03-27 14:50:51 -04003130 bool Surface::isNonNormalizedInteger(Format format)
3131 {
3132 return isSignedNonNormalizedInteger(format) ||
3133 isUnsignedNonNormalizedInteger(format);
3134 }
3135
3136 bool Surface::isNormalizedInteger(Format format)
3137 {
3138 return !isFloatFormat(format) &&
3139 !isNonNormalizedInteger(format) &&
3140 !isCompressed(format) &&
3141 !isDepth(format) &&
3142 !isStencil(format);
3143 }
3144
John Bauman89401822014-05-06 15:04:28 -04003145 int Surface::componentCount(Format format)
3146 {
3147 switch(format)
3148 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003149 case FORMAT_R5G6B5: return 3;
3150 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003151 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003152 case FORMAT_X8B8G8R8: return 3;
3153 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04003154 case FORMAT_SRGB8_X8: return 3;
3155 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003156 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003157 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003158 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003159 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003160 case FORMAT_R8I_SNORM: return 1;
3161 case FORMAT_G8R8I_SNORM: return 2;
3162 case FORMAT_X8B8G8R8I_SNORM:return 3;
3163 case FORMAT_A8B8G8R8I_SNORM:return 4;
3164 case FORMAT_R8UI: return 1;
3165 case FORMAT_G8R8UI: return 2;
3166 case FORMAT_X8B8G8R8UI: return 3;
3167 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05003168 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003169 case FORMAT_G16R16I: return 2;
3170 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003171 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003172 case FORMAT_G32R32I: return 2;
3173 case FORMAT_G32R32UI: return 2;
3174 case FORMAT_X16B16G16R16I: return 3;
3175 case FORMAT_X16B16G16R16UI: return 3;
3176 case FORMAT_A16B16G16R16I: return 4;
3177 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003178 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003179 case FORMAT_X32B32G32R32I: return 3;
3180 case FORMAT_X32B32G32R32UI: return 3;
3181 case FORMAT_A32B32G32R32I: return 4;
3182 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003183 case FORMAT_V8U8: return 2;
3184 case FORMAT_Q8W8V8U8: return 4;
3185 case FORMAT_X8L8V8U8: return 3;
3186 case FORMAT_V16U16: return 2;
3187 case FORMAT_A16W16V16U16: return 4;
3188 case FORMAT_Q16W16V16U16: return 4;
3189 case FORMAT_R32F: return 1;
3190 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003191 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003192 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003193 case FORMAT_D32F: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003194 case FORMAT_D32FS8: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003195 case FORMAT_D32F_LOCKABLE: return 1;
3196 case FORMAT_D32FS8_TEXTURE: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003197 case FORMAT_D32F_SHADOW: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003198 case FORMAT_D32FS8_SHADOW: return 1;
3199 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003200 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003201 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003202 case FORMAT_R16I: return 1;
3203 case FORMAT_R16UI: return 1;
3204 case FORMAT_R32I: return 1;
3205 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003206 case FORMAT_L8: return 1;
3207 case FORMAT_L16: return 1;
3208 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003209 case FORMAT_YV12_BT601: return 3;
3210 case FORMAT_YV12_BT709: return 3;
3211 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003212 default:
3213 ASSERT(false);
3214 }
3215
3216 return 1;
3217 }
3218
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003219 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format)
John Bauman89401822014-05-06 15:04:28 -04003220 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003221 // Render targets require 2x2 quads
3222 int width2 = (width + 1) & ~1;
3223 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003224
Nicolas Capens6ea71872015-06-26 13:00:48 -04003225 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
Nicolas Capens48ef1252016-11-07 15:30:33 -05003226 // and stencil operations also read 8 bytes per four 8-bit stencil values,
Nicolas Capens6ea71872015-06-26 13:00:48 -04003227 // so we have to allocate 4 extra bytes to avoid buffer overruns.
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003228 return allocate(size(width2, height2, depth, border, samples, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003229 }
3230
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003231 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003232 {
3233 while((size_t)buffer & 0x1 && bytes >= 1)
3234 {
3235 *(char*)buffer = (char)pattern;
3236 (char*&)buffer += 1;
3237 bytes -= 1;
3238 }
3239
3240 while((size_t)buffer & 0x3 && bytes >= 2)
3241 {
3242 *(short*)buffer = (short)pattern;
3243 (short*&)buffer += 1;
3244 bytes -= 2;
3245 }
3246
Nicolas Capens47dc8672017-04-25 12:54:39 -04003247 #if defined(__i386__) || defined(__x86_64__)
3248 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04003249 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003250 while((size_t)buffer & 0xF && bytes >= 4)
3251 {
3252 *(int*)buffer = pattern;
3253 (int*&)buffer += 1;
3254 bytes -= 4;
3255 }
3256
3257 __m128 quad = _mm_set_ps1((float&)pattern);
3258
3259 float *pointer = (float*)buffer;
3260 int qxwords = bytes / 64;
3261 bytes -= qxwords * 64;
3262
3263 while(qxwords--)
3264 {
3265 _mm_stream_ps(pointer + 0, quad);
3266 _mm_stream_ps(pointer + 4, quad);
3267 _mm_stream_ps(pointer + 8, quad);
3268 _mm_stream_ps(pointer + 12, quad);
3269
3270 pointer += 16;
3271 }
3272
3273 buffer = pointer;
John Bauman89401822014-05-06 15:04:28 -04003274 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003275 #endif
John Bauman89401822014-05-06 15:04:28 -04003276
3277 while(bytes >= 4)
3278 {
3279 *(int*)buffer = (int)pattern;
3280 (int*&)buffer += 1;
3281 bytes -= 4;
3282 }
3283
3284 while(bytes >= 2)
3285 {
3286 *(short*)buffer = (short)pattern;
3287 (short*&)buffer += 1;
3288 bytes -= 2;
3289 }
3290
3291 while(bytes >= 1)
3292 {
3293 *(char*)buffer = (char)pattern;
3294 (char*&)buffer += 1;
3295 bytes -= 1;
3296 }
3297 }
3298
Nicolas Capensbf7a8142017-05-19 10:57:28 -04003299 void Surface::sync()
3300 {
3301 resource->lock(EXCLUSIVE);
3302 resource->unlock();
3303 }
3304
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003305 bool Surface::isEntire(const Rect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003306 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003307 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3308 }
John Bauman89401822014-05-06 15:04:28 -04003309
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003310 Rect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003311 {
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003312 return Rect(0, 0, internal.width, internal.height);
John Bauman89401822014-05-06 15:04:28 -04003313 }
3314
Nicolas Capensc39901e2016-03-21 16:37:44 -04003315 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003316 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003317 if(width == 0 || height == 0) return;
3318
John Bauman89401822014-05-06 15:04:28 -04003319 // Not overlapping
3320 if(x0 > internal.width) return;
3321 if(y0 > internal.height) return;
3322 if(x0 + width < 0) return;
3323 if(y0 + height < 0) return;
3324
3325 // Clip against dimensions
3326 if(x0 < 0) {width += x0; x0 = 0;}
3327 if(x0 + width > internal.width) width = internal.width - x0;
3328 if(y0 < 0) {height += y0; y0 = 0;}
3329 if(y0 + height > internal.height) height = internal.height - y0;
3330
3331 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3332 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3333
John Bauman89401822014-05-06 15:04:28 -04003334 int x1 = x0 + width;
3335 int y1 = y0 + height;
3336
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003337 if(!hasQuadLayout(internal.format))
John Bauman89401822014-05-06 15:04:28 -04003338 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003339 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC);
John Bauman89401822014-05-06 15:04:28 -04003340
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003341 for(int z = 0; z < internal.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003342 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003343 float *row = target;
John Bauman89401822014-05-06 15:04:28 -04003344 for(int y = y0; y < y1; y++)
3345 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003346 memfill4(row, (int&)depth, width * sizeof(float));
3347 row += internal.pitchP;
John Bauman89401822014-05-06 15:04:28 -04003348 }
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003349 target += internal.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003350 }
3351
3352 unlockInternal();
3353 }
3354 else // Quad layout
3355 {
3356 if(complementaryDepthBuffer)
3357 {
3358 depth = 1 - depth;
3359 }
3360
3361 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3362
Alexis Hetu358a1442015-12-03 14:23:10 -05003363 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3364 int oddX1 = (x1 & ~1) * 2;
3365 int evenX0 = ((x0 + 1) & ~1) * 2;
3366 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3367
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003368 for(int z = 0; z < internal.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003369 {
3370 for(int y = y0; y < y1; y++)
3371 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003372 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003373
John Bauman89401822014-05-06 15:04:28 -04003374 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3375 {
3376 if((x0 & 1) != 0)
3377 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003378 target[oddX0 + 0] = depth;
3379 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003380 }
3381
Alexis Hetu358a1442015-12-03 14:23:10 -05003382 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003383 // {
3384 // target[x2 + 0] = depth;
3385 // target[x2 + 1] = depth;
3386 // target[x2 + 2] = depth;
3387 // target[x2 + 3] = depth;
3388 // }
3389
3390 // __asm
3391 // {
3392 // movss xmm0, depth
3393 // shufps xmm0, xmm0, 0x00
3394 //
3395 // mov eax, x0
3396 // add eax, 1
3397 // and eax, 0xFFFFFFFE
3398 // cmp eax, x1
3399 // jge qEnd
3400 //
3401 // mov edi, target
3402 //
3403 // qLoop:
3404 // movntps [edi+8*eax], xmm0
3405 //
3406 // add eax, 2
3407 // cmp eax, x1
3408 // jl qLoop
3409 // qEnd:
3410 // }
3411
Alexis Hetu358a1442015-12-03 14:23:10 -05003412 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003413
3414 if((x1 & 1) != 0)
3415 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003416 target[oddX1 + 0] = depth;
3417 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003418 }
3419
3420 y++;
3421 }
3422 else
3423 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003424 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003425 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003426 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003427 }
3428 }
3429 }
3430
3431 buffer += internal.sliceP;
3432 }
3433
3434 unlockInternal();
3435 }
3436 }
3437
Nicolas Capensc39901e2016-03-21 16:37:44 -04003438 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003439 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003440 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003441
John Bauman89401822014-05-06 15:04:28 -04003442 // Not overlapping
3443 if(x0 > internal.width) return;
3444 if(y0 > internal.height) return;
3445 if(x0 + width < 0) return;
3446 if(y0 + height < 0) return;
3447
3448 // Clip against dimensions
3449 if(x0 < 0) {width += x0; x0 = 0;}
3450 if(x0 + width > internal.width) width = internal.width - x0;
3451 if(y0 < 0) {height += y0; y0 = 0;}
3452 if(y0 + height > internal.height) height = internal.height - y0;
3453
John Bauman89401822014-05-06 15:04:28 -04003454 int x1 = x0 + width;
3455 int y1 = y0 + height;
3456
Alexis Hetu358a1442015-12-03 14:23:10 -05003457 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3458 int oddX1 = (x1 & ~1) * 2;
3459 int evenX0 = ((x0 + 1) & ~1) * 2;
3460 int evenBytes = oddX1 - evenX0;
3461
John Bauman89401822014-05-06 15:04:28 -04003462 unsigned char maskedS = s & mask;
3463 unsigned char invMask = ~mask;
3464 unsigned int fill = maskedS;
Tom Anderson69bc6e82017-03-20 11:54:29 -07003465 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
John Bauman89401822014-05-06 15:04:28 -04003466
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003467 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003468
3469 // Stencil buffers are assumed to use quad layout
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003470 for(int z = 0; z < stencil.samples; z++)
John Bauman89401822014-05-06 15:04:28 -04003471 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003472 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003473 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003474 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003475
3476 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003477 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003478 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003479 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003480 target[oddX0 + 0] = fill;
3481 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003482 }
3483
Alexis Hetu358a1442015-12-03 14:23:10 -05003484 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003485
3486 if((x1 & 1) != 0)
3487 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003488 target[oddX1 + 0] = fill;
3489 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003490 }
3491
3492 y++;
3493 }
3494 else
3495 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003496 for(int x = x0; x < x1; x++)
Alexis Hetu2b052f82015-11-25 13:57:28 -05003497 {
Nicolas Capensc4a3f242017-12-11 15:07:53 -05003498 int i = (x & ~1) * 2 + (x & 1);
Alexis Hetu358a1442015-12-03 14:23:10 -05003499 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003500 }
John Bauman89401822014-05-06 15:04:28 -04003501 }
3502 }
3503
Alexis Hetu2b052f82015-11-25 13:57:28 -05003504 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003505 }
John Bauman89401822014-05-06 15:04:28 -04003506
Alexis Hetu2b052f82015-11-25 13:57:28 -05003507 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003508 }
3509
3510 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3511 {
3512 unsigned char *row;
3513 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003514
John Bauman89401822014-05-06 15:04:28 -04003515 if(internal.dirty)
3516 {
3517 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3518 buffer = &internal;
3519 }
3520 else
3521 {
3522 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3523 buffer = &external;
3524 }
3525
3526 if(buffer->bytes <= 4)
3527 {
3528 int c;
3529 buffer->write(&c, color);
3530
3531 if(buffer->bytes <= 1) c = (c << 8) | c;
3532 if(buffer->bytes <= 2) c = (c << 16) | c;
3533
3534 for(int y = 0; y < height; y++)
3535 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003536 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003537
3538 row += buffer->pitchB;
3539 }
3540 }
3541 else // Generic
3542 {
3543 for(int y = 0; y < height; y++)
3544 {
3545 unsigned char *element = row;
3546
3547 for(int x = 0; x < width; x++)
3548 {
3549 buffer->write(element, color);
3550
3551 element += buffer->bytes;
3552 }
3553
3554 row += buffer->pitchB;
3555 }
3556 }
3557
3558 if(buffer == &internal)
3559 {
3560 unlockInternal();
3561 }
3562 else
3563 {
3564 unlockExternal();
3565 }
3566 }
3567
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003568 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003569 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003570 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003571
Alexis Hetu43577b82015-10-21 15:32:16 -04003572 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003573
Alexis Hetu43577b82015-10-21 15:32:16 -04003574 if(!filter)
3575 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003576 color = source->internal.read((int)srcX, (int)srcY, 0);
Alexis Hetu43577b82015-10-21 15:32:16 -04003577 }
3578 else // Bilinear filtering
3579 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003580 color = source->internal.sample(srcX, srcY, 0);
Alexis Hetu43577b82015-10-21 15:32:16 -04003581 }
John Bauman89401822014-05-06 15:04:28 -04003582
3583 internal.write(x, y, color);
3584 }
3585
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003586 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
Alexis Hetu43577b82015-10-21 15:32:16 -04003587 {
3588 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3589
3590 sw::Color<float> color;
3591
3592 if(!filter)
3593 {
3594 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3595 }
3596 else // Bilinear filtering
3597 {
3598 color = source->internal.sample(srcX, srcY, srcZ);
3599 }
3600
3601 internal.write(x, y, z, color);
3602 }
3603
Alexis Hetua76a1bf2016-11-29 17:17:26 -05003604 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge)
3605 {
3606 Surface *dst = this;
3607
3608 // Figure out if the edges to be copied in reverse order respectively from one another
3609 // The copy should be reversed whenever the same edges are contiguous or if we're
3610 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
3611 //
3612 // | +y |
3613 // | -x | +z | +x | -z |
3614 // | -y |
3615
3616 bool reverse = (srcEdge == dstEdge) ||
3617 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
3618 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
3619 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
3620 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
3621
3622 int srcBytes = src->bytes(src->Surface::getInternalFormat());
3623 int srcPitch = src->getInternalPitchB();
3624 int dstBytes = dst->bytes(dst->Surface::getInternalFormat());
3625 int dstPitch = dst->getInternalPitchB();
3626
3627 int srcW = src->getWidth();
3628 int srcH = src->getHeight();
3629 int dstW = dst->getWidth();
3630 int dstH = dst->getHeight();
3631
3632 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes);
3633
3634 // Src is expressed in the regular [0, width-1], [0, height-1] space
3635 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch;
3636 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0));
3637
3638 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space
3639 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1);
3640 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta);
3641
3642 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart;
3643 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart;
3644
3645 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta)
3646 {
3647 memcpy(dstBuf, srcBuf, srcBytes);
3648 }
3649
3650 if(dstEdge == LEFT || dstEdge == RIGHT)
3651 {
3652 // TOP and BOTTOM are already set, let's average out the corners
3653 int x0 = (dstEdge == RIGHT) ? dstW : -1;
3654 int y0 = -1;
3655 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0;
3656 int y1 = 0;
3657 dst->computeCubeCorner(x0, y0, x1, y1);
3658 y0 = dstH;
3659 y1 = dstH - 1;
3660 dst->computeCubeCorner(x0, y0, x1, y1);
3661 }
3662
3663 src->unlockInternal();
3664 dst->unlockInternal();
3665 }
3666
3667 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1)
3668 {
3669 ASSERT(internal.lock != LOCK_UNLOCKED);
3670
3671 sw::Color<float> color = internal.read(x0, y1);
3672 color += internal.read(x1, y0);
3673 color += internal.read(x1, y1);
3674 color *= (1.0f / 3.0f);
3675
3676 internal.write(x0, y0, color);
3677 }
3678
John Bauman89401822014-05-06 15:04:28 -04003679 bool Surface::hasStencil() const
3680 {
3681 return isStencil(external.format);
3682 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003683
John Bauman89401822014-05-06 15:04:28 -04003684 bool Surface::hasDepth() const
3685 {
3686 return isDepth(external.format);
3687 }
3688
3689 bool Surface::hasPalette() const
3690 {
3691 return isPalette(external.format);
3692 }
3693
3694 bool Surface::isRenderTarget() const
3695 {
3696 return renderTarget;
3697 }
3698
Nicolas Capens73e18c12017-11-28 13:31:35 -05003699 bool Surface::hasDirtyContents() const
John Bauman89401822014-05-06 15:04:28 -04003700 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003701 return dirtyContents;
John Bauman89401822014-05-06 15:04:28 -04003702 }
3703
Nicolas Capens73e18c12017-11-28 13:31:35 -05003704 void Surface::markContentsClean()
John Bauman89401822014-05-06 15:04:28 -04003705 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003706 dirtyContents = false;
John Bauman89401822014-05-06 15:04:28 -04003707 }
3708
3709 Resource *Surface::getResource()
3710 {
3711 return resource;
3712 }
3713
3714 bool Surface::identicalFormats() const
3715 {
John Bauman66b8ab22014-05-06 15:57:45 -04003716 return external.format == internal.format &&
3717 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003718 external.height == internal.height &&
3719 external.depth == internal.depth &&
3720 external.pitchB == internal.pitchB &&
Alexis Hetu9c6d5222016-11-29 17:02:14 -05003721 external.sliceB == internal.sliceB &&
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003722 external.border == internal.border &&
3723 external.samples == internal.samples;
John Bauman89401822014-05-06 15:04:28 -04003724 }
3725
3726 Format Surface::selectInternalFormat(Format format) const
3727 {
3728 switch(format)
3729 {
3730 case FORMAT_NULL:
3731 return FORMAT_NULL;
3732 case FORMAT_P8:
3733 case FORMAT_A8P8:
3734 case FORMAT_A4R4G4B4:
3735 case FORMAT_A1R5G5B5:
3736 case FORMAT_A8R3G3B2:
3737 return FORMAT_A8R8G8B8;
3738 case FORMAT_A8:
3739 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003740 case FORMAT_R8I:
3741 return FORMAT_R8I;
3742 case FORMAT_R8UI:
3743 return FORMAT_R8UI;
3744 case FORMAT_R8I_SNORM:
3745 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003746 case FORMAT_R8:
3747 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003748 case FORMAT_R16I:
3749 return FORMAT_R16I;
3750 case FORMAT_R16UI:
3751 return FORMAT_R16UI;
3752 case FORMAT_R32I:
3753 return FORMAT_R32I;
3754 case FORMAT_R32UI:
3755 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003756 case FORMAT_X16B16G16R16I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003757 return FORMAT_X16B16G16R16I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003758 case FORMAT_A16B16G16R16I:
3759 return FORMAT_A16B16G16R16I;
3760 case FORMAT_X16B16G16R16UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003761 return FORMAT_X16B16G16R16UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003762 case FORMAT_A16B16G16R16UI:
3763 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003764 case FORMAT_A2R10G10B10:
3765 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003766 case FORMAT_A16B16G16R16:
3767 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003768 case FORMAT_X32B32G32R32I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003769 return FORMAT_X32B32G32R32I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003770 case FORMAT_A32B32G32R32I:
3771 return FORMAT_A32B32G32R32I;
3772 case FORMAT_X32B32G32R32UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003773 return FORMAT_X32B32G32R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003774 case FORMAT_A32B32G32R32UI:
3775 return FORMAT_A32B32G32R32UI;
3776 case FORMAT_G8R8I:
3777 return FORMAT_G8R8I;
3778 case FORMAT_G8R8UI:
3779 return FORMAT_G8R8UI;
3780 case FORMAT_G8R8I_SNORM:
3781 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003782 case FORMAT_G8R8:
3783 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003784 case FORMAT_G16R16I:
3785 return FORMAT_G16R16I;
3786 case FORMAT_G16R16UI:
3787 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003788 case FORMAT_G16R16:
3789 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003790 case FORMAT_G32R32I:
3791 return FORMAT_G32R32I;
3792 case FORMAT_G32R32UI:
3793 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003794 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003795 if(lockable || !quadLayoutEnabled)
3796 {
3797 return FORMAT_A8R8G8B8;
3798 }
3799 else
3800 {
3801 return FORMAT_A8G8R8B8Q;
3802 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003803 case FORMAT_A8B8G8R8I:
3804 return FORMAT_A8B8G8R8I;
3805 case FORMAT_A8B8G8R8UI:
3806 return FORMAT_A8B8G8R8UI;
3807 case FORMAT_A8B8G8R8I_SNORM:
3808 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003809 case FORMAT_R5G5B5A1:
3810 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003811 case FORMAT_A8B8G8R8:
3812 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003813 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003814 return FORMAT_R5G6B5;
3815 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003816 case FORMAT_R8G8B8:
3817 case FORMAT_X4R4G4B4:
3818 case FORMAT_X1R5G5B5:
3819 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003820 if(lockable || !quadLayoutEnabled)
3821 {
3822 return FORMAT_X8R8G8B8;
3823 }
3824 else
3825 {
3826 return FORMAT_X8G8R8B8Q;
3827 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003828 case FORMAT_X8B8G8R8I:
3829 return FORMAT_X8B8G8R8I;
3830 case FORMAT_X8B8G8R8UI:
3831 return FORMAT_X8B8G8R8UI;
3832 case FORMAT_X8B8G8R8I_SNORM:
3833 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003834 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003835 case FORMAT_X8B8G8R8:
3836 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003837 case FORMAT_SRGB8_X8:
3838 return FORMAT_SRGB8_X8;
3839 case FORMAT_SRGB8_A8:
3840 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003841 // Compressed formats
3842 #if S3TC_SUPPORT
3843 case FORMAT_DXT1:
3844 case FORMAT_DXT3:
3845 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003846 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003847 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3848 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3849 case FORMAT_RGBA8_ETC2_EAC:
3850 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3851 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3852 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3853 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3854 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3855 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3856 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3857 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3858 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3859 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3860 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3861 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3862 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3863 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3864 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3865 return FORMAT_A8R8G8B8;
3866 case FORMAT_RGBA_ASTC_4x4_KHR:
3867 case FORMAT_RGBA_ASTC_5x4_KHR:
3868 case FORMAT_RGBA_ASTC_5x5_KHR:
3869 case FORMAT_RGBA_ASTC_6x5_KHR:
3870 case FORMAT_RGBA_ASTC_6x6_KHR:
3871 case FORMAT_RGBA_ASTC_8x5_KHR:
3872 case FORMAT_RGBA_ASTC_8x6_KHR:
3873 case FORMAT_RGBA_ASTC_8x8_KHR:
3874 case FORMAT_RGBA_ASTC_10x5_KHR:
3875 case FORMAT_RGBA_ASTC_10x6_KHR:
3876 case FORMAT_RGBA_ASTC_10x8_KHR:
3877 case FORMAT_RGBA_ASTC_10x10_KHR:
3878 case FORMAT_RGBA_ASTC_12x10_KHR:
3879 case FORMAT_RGBA_ASTC_12x12_KHR:
3880 // ASTC supports HDR, so a floating point format is required to represent it properly
3881 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003882 case FORMAT_ATI1:
3883 return FORMAT_R8;
Alexis Hetuf46493f2017-12-18 15:32:26 -05003884 case FORMAT_R11_EAC:
Alexis Hetu0de50d42015-09-09 13:56:41 -04003885 case FORMAT_SIGNED_R11_EAC:
3886 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003887 case FORMAT_ATI2:
3888 return FORMAT_G8R8;
Alexis Hetuf46493f2017-12-18 15:32:26 -05003889 case FORMAT_RG11_EAC:
Alexis Hetu0de50d42015-09-09 13:56:41 -04003890 case FORMAT_SIGNED_RG11_EAC:
3891 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003892 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003893 case FORMAT_RGB8_ETC2:
3894 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003895 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003896 // Bumpmap formats
3897 case FORMAT_V8U8: return FORMAT_V8U8;
3898 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3899 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3900 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3901 case FORMAT_V16U16: return FORMAT_V16U16;
3902 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3903 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3904 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003905 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003906 case FORMAT_R16F: return FORMAT_R32F;
3907 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003908 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003909 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003910 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003911 case FORMAT_R32F: return FORMAT_R32F;
3912 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003913 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3914 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003915 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3916 // Luminance formats
3917 case FORMAT_L8: return FORMAT_L8;
3918 case FORMAT_A4L4: return FORMAT_A8L8;
3919 case FORMAT_L16: return FORMAT_L16;
3920 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003921 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003922 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003923 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003924 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003925 // Depth/stencil formats
3926 case FORMAT_D16:
3927 case FORMAT_D32:
3928 case FORMAT_D24X8:
John Bauman89401822014-05-06 15:04:28 -04003929 if(hasParent) // Texture
3930 {
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003931 return FORMAT_D32F_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003932 }
3933 else if(complementaryDepthBuffer)
3934 {
3935 return FORMAT_D32F_COMPLEMENTARY;
3936 }
3937 else
3938 {
3939 return FORMAT_D32F;
3940 }
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003941 case FORMAT_D24S8:
3942 case FORMAT_D24FS8:
3943 if(hasParent) // Texture
3944 {
3945 return FORMAT_D32FS8_SHADOW;
3946 }
3947 else if(complementaryDepthBuffer)
3948 {
3949 return FORMAT_D32FS8_COMPLEMENTARY;
3950 }
3951 else
3952 {
3953 return FORMAT_D32FS8;
3954 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003955 case FORMAT_D32F: return FORMAT_D32F;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003956 case FORMAT_D32FS8: return FORMAT_D32FS8;
John Bauman66b8ab22014-05-06 15:57:45 -04003957 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3958 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3959 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3960 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3961 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003962 case FORMAT_S8: return FORMAT_S8;
3963 // YUV formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003964 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3965 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3966 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003967 default:
3968 ASSERT(false);
3969 }
3970
3971 return FORMAT_NULL;
3972 }
3973
3974 void Surface::setTexturePalette(unsigned int *palette)
3975 {
3976 Surface::palette = palette;
3977 Surface::paletteID++;
3978 }
3979
3980 void Surface::resolve()
3981 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003982 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
John Bauman89401822014-05-06 15:04:28 -04003983 {
3984 return;
3985 }
3986
Nicolas Capensbfa23b32017-12-11 10:06:37 -05003987 ASSERT(internal.depth == 1); // Unimplemented
3988
John Bauman89401822014-05-06 15:04:28 -04003989 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3990
John Bauman89401822014-05-06 15:04:28 -04003991 int width = internal.width;
3992 int height = internal.height;
3993 int pitch = internal.pitchB;
3994 int slice = internal.sliceB;
3995
3996 unsigned char *source0 = (unsigned char*)source;
3997 unsigned char *source1 = source0 + slice;
3998 unsigned char *source2 = source1 + slice;
3999 unsigned char *source3 = source2 + slice;
4000 unsigned char *source4 = source3 + slice;
4001 unsigned char *source5 = source4 + slice;
4002 unsigned char *source6 = source5 + slice;
4003 unsigned char *source7 = source6 + slice;
4004 unsigned char *source8 = source7 + slice;
4005 unsigned char *source9 = source8 + slice;
4006 unsigned char *sourceA = source9 + slice;
4007 unsigned char *sourceB = sourceA + slice;
4008 unsigned char *sourceC = sourceB + slice;
4009 unsigned char *sourceD = sourceC + slice;
4010 unsigned char *sourceE = sourceD + slice;
4011 unsigned char *sourceF = sourceE + slice;
4012
Alexis Hetu049a1872016-04-25 16:59:58 -04004013 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
4014 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
4015 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04004016 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004017 #if defined(__i386__) || defined(__x86_64__)
4018 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004019 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004020 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004021 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004022 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004023 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004024 for(int x = 0; x < width; x += 4)
4025 {
4026 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4027 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004028
Nicolas Capens47dc8672017-04-25 12:54:39 -04004029 c0 = _mm_avg_epu8(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004030
Nicolas Capens47dc8672017-04-25 12:54:39 -04004031 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4032 }
4033
4034 source0 += pitch;
4035 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004036 }
John Bauman89401822014-05-06 15:04:28 -04004037 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004038 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004039 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004040 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004041 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004042 for(int x = 0; x < width; x += 4)
4043 {
4044 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4045 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4046 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4047 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004048
Nicolas Capens47dc8672017-04-25 12:54:39 -04004049 c0 = _mm_avg_epu8(c0, c1);
4050 c2 = _mm_avg_epu8(c2, c3);
4051 c0 = _mm_avg_epu8(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004052
Nicolas Capens47dc8672017-04-25 12:54:39 -04004053 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4054 }
4055
4056 source0 += pitch;
4057 source1 += pitch;
4058 source2 += pitch;
4059 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004060 }
John Bauman89401822014-05-06 15:04:28 -04004061 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004062 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004063 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004064 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004065 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004066 for(int x = 0; x < width; x += 4)
4067 {
4068 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4069 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4070 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4071 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4072 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4073 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4074 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4075 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004076
Nicolas Capens47dc8672017-04-25 12:54:39 -04004077 c0 = _mm_avg_epu8(c0, c1);
4078 c2 = _mm_avg_epu8(c2, c3);
4079 c4 = _mm_avg_epu8(c4, c5);
4080 c6 = _mm_avg_epu8(c6, c7);
4081 c0 = _mm_avg_epu8(c0, c2);
4082 c4 = _mm_avg_epu8(c4, c6);
4083 c0 = _mm_avg_epu8(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004084
Nicolas Capens47dc8672017-04-25 12:54:39 -04004085 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4086 }
4087
4088 source0 += pitch;
4089 source1 += pitch;
4090 source2 += pitch;
4091 source3 += pitch;
4092 source4 += pitch;
4093 source5 += pitch;
4094 source6 += pitch;
4095 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004096 }
John Bauman89401822014-05-06 15:04:28 -04004097 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004098 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004099 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004100 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004101 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004102 for(int x = 0; x < width; x += 4)
4103 {
4104 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4105 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4106 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4107 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4108 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4109 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4110 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4111 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4112 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4113 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4114 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4115 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4116 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4117 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4118 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4119 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004120
Nicolas Capens47dc8672017-04-25 12:54:39 -04004121 c0 = _mm_avg_epu8(c0, c1);
4122 c2 = _mm_avg_epu8(c2, c3);
4123 c4 = _mm_avg_epu8(c4, c5);
4124 c6 = _mm_avg_epu8(c6, c7);
4125 c8 = _mm_avg_epu8(c8, c9);
4126 cA = _mm_avg_epu8(cA, cB);
4127 cC = _mm_avg_epu8(cC, cD);
4128 cE = _mm_avg_epu8(cE, cF);
4129 c0 = _mm_avg_epu8(c0, c2);
4130 c4 = _mm_avg_epu8(c4, c6);
4131 c8 = _mm_avg_epu8(c8, cA);
4132 cC = _mm_avg_epu8(cC, cE);
4133 c0 = _mm_avg_epu8(c0, c4);
4134 c8 = _mm_avg_epu8(c8, cC);
4135 c0 = _mm_avg_epu8(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004136
Nicolas Capens47dc8672017-04-25 12:54:39 -04004137 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4138 }
4139
4140 source0 += pitch;
4141 source1 += pitch;
4142 source2 += pitch;
4143 source3 += pitch;
4144 source4 += pitch;
4145 source5 += pitch;
4146 source6 += pitch;
4147 source7 += pitch;
4148 source8 += pitch;
4149 source9 += pitch;
4150 sourceA += pitch;
4151 sourceB += pitch;
4152 sourceC += pitch;
4153 sourceD += pitch;
4154 sourceE += pitch;
4155 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004156 }
John Bauman89401822014-05-06 15:04:28 -04004157 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004158 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004159 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004160 else
4161 #endif
John Bauman89401822014-05-06 15:04:28 -04004162 {
4163 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
4164
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004165 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004166 {
4167 for(int y = 0; y < height; y++)
4168 {
4169 for(int x = 0; x < width; x++)
4170 {
4171 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4172 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4173
4174 c0 = AVERAGE(c0, c1);
4175
4176 *(unsigned int*)(source0 + 4 * x) = c0;
4177 }
4178
4179 source0 += pitch;
4180 source1 += pitch;
4181 }
4182 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004183 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004184 {
4185 for(int y = 0; y < height; y++)
4186 {
4187 for(int x = 0; x < width; x++)
4188 {
4189 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4190 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4191 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4192 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4193
4194 c0 = AVERAGE(c0, c1);
4195 c2 = AVERAGE(c2, c3);
4196 c0 = AVERAGE(c0, c2);
4197
4198 *(unsigned int*)(source0 + 4 * x) = c0;
4199 }
4200
4201 source0 += pitch;
4202 source1 += pitch;
4203 source2 += pitch;
4204 source3 += pitch;
4205 }
4206 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004207 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004208 {
4209 for(int y = 0; y < height; y++)
4210 {
4211 for(int x = 0; x < width; x++)
4212 {
4213 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4214 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4215 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4216 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4217 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4218 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4219 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4220 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4221
4222 c0 = AVERAGE(c0, c1);
4223 c2 = AVERAGE(c2, c3);
4224 c4 = AVERAGE(c4, c5);
4225 c6 = AVERAGE(c6, c7);
4226 c0 = AVERAGE(c0, c2);
4227 c4 = AVERAGE(c4, c6);
4228 c0 = AVERAGE(c0, c4);
4229
4230 *(unsigned int*)(source0 + 4 * x) = c0;
4231 }
4232
4233 source0 += pitch;
4234 source1 += pitch;
4235 source2 += pitch;
4236 source3 += pitch;
4237 source4 += pitch;
4238 source5 += pitch;
4239 source6 += pitch;
4240 source7 += pitch;
4241 }
4242 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004243 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004244 {
4245 for(int y = 0; y < height; y++)
4246 {
4247 for(int x = 0; x < width; x++)
4248 {
4249 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4250 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4251 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4252 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4253 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4254 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4255 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4256 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4257 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4258 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4259 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4260 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4261 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4262 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4263 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4264 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4265
4266 c0 = AVERAGE(c0, c1);
4267 c2 = AVERAGE(c2, c3);
4268 c4 = AVERAGE(c4, c5);
4269 c6 = AVERAGE(c6, c7);
4270 c8 = AVERAGE(c8, c9);
4271 cA = AVERAGE(cA, cB);
4272 cC = AVERAGE(cC, cD);
4273 cE = AVERAGE(cE, cF);
4274 c0 = AVERAGE(c0, c2);
4275 c4 = AVERAGE(c4, c6);
4276 c8 = AVERAGE(c8, cA);
4277 cC = AVERAGE(cC, cE);
4278 c0 = AVERAGE(c0, c4);
4279 c8 = AVERAGE(c8, cC);
4280 c0 = AVERAGE(c0, c8);
4281
4282 *(unsigned int*)(source0 + 4 * x) = c0;
4283 }
4284
4285 source0 += pitch;
4286 source1 += pitch;
4287 source2 += pitch;
4288 source3 += pitch;
4289 source4 += pitch;
4290 source5 += pitch;
4291 source6 += pitch;
4292 source7 += pitch;
4293 source8 += pitch;
4294 source9 += pitch;
4295 sourceA += pitch;
4296 sourceB += pitch;
4297 sourceC += pitch;
4298 sourceD += pitch;
4299 sourceE += pitch;
4300 sourceF += pitch;
4301 }
4302 }
4303 else ASSERT(false);
4304
4305 #undef AVERAGE
4306 }
4307 }
4308 else if(internal.format == FORMAT_G16R16)
4309 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004310
4311 #if defined(__i386__) || defined(__x86_64__)
4312 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004313 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004314 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004315 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004316 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004317 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004318 for(int x = 0; x < width; x += 4)
4319 {
4320 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4321 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004322
Nicolas Capens47dc8672017-04-25 12:54:39 -04004323 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004324
Nicolas Capens47dc8672017-04-25 12:54:39 -04004325 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4326 }
4327
4328 source0 += pitch;
4329 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004330 }
John Bauman89401822014-05-06 15:04:28 -04004331 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004332 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004333 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004334 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004335 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004336 for(int x = 0; x < width; x += 4)
4337 {
4338 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4339 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4340 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4341 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004342
Nicolas Capens47dc8672017-04-25 12:54:39 -04004343 c0 = _mm_avg_epu16(c0, c1);
4344 c2 = _mm_avg_epu16(c2, c3);
4345 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004346
Nicolas Capens47dc8672017-04-25 12:54:39 -04004347 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4348 }
4349
4350 source0 += pitch;
4351 source1 += pitch;
4352 source2 += pitch;
4353 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004354 }
John Bauman89401822014-05-06 15:04:28 -04004355 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004356 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004357 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004358 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004359 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004360 for(int x = 0; x < width; x += 4)
4361 {
4362 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4363 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4364 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4365 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4366 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4367 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4368 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4369 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004370
Nicolas Capens47dc8672017-04-25 12:54:39 -04004371 c0 = _mm_avg_epu16(c0, c1);
4372 c2 = _mm_avg_epu16(c2, c3);
4373 c4 = _mm_avg_epu16(c4, c5);
4374 c6 = _mm_avg_epu16(c6, c7);
4375 c0 = _mm_avg_epu16(c0, c2);
4376 c4 = _mm_avg_epu16(c4, c6);
4377 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004378
Nicolas Capens47dc8672017-04-25 12:54:39 -04004379 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4380 }
4381
4382 source0 += pitch;
4383 source1 += pitch;
4384 source2 += pitch;
4385 source3 += pitch;
4386 source4 += pitch;
4387 source5 += pitch;
4388 source6 += pitch;
4389 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004390 }
John Bauman89401822014-05-06 15:04:28 -04004391 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004392 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004393 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004394 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004395 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004396 for(int x = 0; x < width; x += 4)
4397 {
4398 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4399 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4400 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4401 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4402 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4403 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4404 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4405 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4406 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4407 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4408 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4409 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4410 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4411 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4412 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4413 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004414
Nicolas Capens47dc8672017-04-25 12:54:39 -04004415 c0 = _mm_avg_epu16(c0, c1);
4416 c2 = _mm_avg_epu16(c2, c3);
4417 c4 = _mm_avg_epu16(c4, c5);
4418 c6 = _mm_avg_epu16(c6, c7);
4419 c8 = _mm_avg_epu16(c8, c9);
4420 cA = _mm_avg_epu16(cA, cB);
4421 cC = _mm_avg_epu16(cC, cD);
4422 cE = _mm_avg_epu16(cE, cF);
4423 c0 = _mm_avg_epu16(c0, c2);
4424 c4 = _mm_avg_epu16(c4, c6);
4425 c8 = _mm_avg_epu16(c8, cA);
4426 cC = _mm_avg_epu16(cC, cE);
4427 c0 = _mm_avg_epu16(c0, c4);
4428 c8 = _mm_avg_epu16(c8, cC);
4429 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004430
Nicolas Capens47dc8672017-04-25 12:54:39 -04004431 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4432 }
4433
4434 source0 += pitch;
4435 source1 += pitch;
4436 source2 += pitch;
4437 source3 += pitch;
4438 source4 += pitch;
4439 source5 += pitch;
4440 source6 += pitch;
4441 source7 += pitch;
4442 source8 += pitch;
4443 source9 += pitch;
4444 sourceA += pitch;
4445 sourceB += pitch;
4446 sourceC += pitch;
4447 sourceD += pitch;
4448 sourceE += pitch;
4449 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004450 }
John Bauman89401822014-05-06 15:04:28 -04004451 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004452 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004453 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004454 else
4455 #endif
John Bauman89401822014-05-06 15:04:28 -04004456 {
4457 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4458
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004459 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004460 {
4461 for(int y = 0; y < height; y++)
4462 {
4463 for(int x = 0; x < width; x++)
4464 {
4465 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4466 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4467
4468 c0 = AVERAGE(c0, c1);
4469
4470 *(unsigned int*)(source0 + 4 * x) = c0;
4471 }
4472
4473 source0 += pitch;
4474 source1 += pitch;
4475 }
4476 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004477 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004478 {
4479 for(int y = 0; y < height; y++)
4480 {
4481 for(int x = 0; x < width; x++)
4482 {
4483 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4484 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4485 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4486 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4487
4488 c0 = AVERAGE(c0, c1);
4489 c2 = AVERAGE(c2, c3);
4490 c0 = AVERAGE(c0, c2);
4491
4492 *(unsigned int*)(source0 + 4 * x) = c0;
4493 }
4494
4495 source0 += pitch;
4496 source1 += pitch;
4497 source2 += pitch;
4498 source3 += pitch;
4499 }
4500 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004501 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004502 {
4503 for(int y = 0; y < height; y++)
4504 {
4505 for(int x = 0; x < width; x++)
4506 {
4507 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4508 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4509 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4510 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4511 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4512 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4513 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4514 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4515
4516 c0 = AVERAGE(c0, c1);
4517 c2 = AVERAGE(c2, c3);
4518 c4 = AVERAGE(c4, c5);
4519 c6 = AVERAGE(c6, c7);
4520 c0 = AVERAGE(c0, c2);
4521 c4 = AVERAGE(c4, c6);
4522 c0 = AVERAGE(c0, c4);
4523
4524 *(unsigned int*)(source0 + 4 * x) = c0;
4525 }
4526
4527 source0 += pitch;
4528 source1 += pitch;
4529 source2 += pitch;
4530 source3 += pitch;
4531 source4 += pitch;
4532 source5 += pitch;
4533 source6 += pitch;
4534 source7 += pitch;
4535 }
4536 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004537 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004538 {
4539 for(int y = 0; y < height; y++)
4540 {
4541 for(int x = 0; x < width; x++)
4542 {
4543 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4544 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4545 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4546 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4547 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4548 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4549 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4550 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4551 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4552 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4553 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4554 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4555 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4556 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4557 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4558 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4559
4560 c0 = AVERAGE(c0, c1);
4561 c2 = AVERAGE(c2, c3);
4562 c4 = AVERAGE(c4, c5);
4563 c6 = AVERAGE(c6, c7);
4564 c8 = AVERAGE(c8, c9);
4565 cA = AVERAGE(cA, cB);
4566 cC = AVERAGE(cC, cD);
4567 cE = AVERAGE(cE, cF);
4568 c0 = AVERAGE(c0, c2);
4569 c4 = AVERAGE(c4, c6);
4570 c8 = AVERAGE(c8, cA);
4571 cC = AVERAGE(cC, cE);
4572 c0 = AVERAGE(c0, c4);
4573 c8 = AVERAGE(c8, cC);
4574 c0 = AVERAGE(c0, c8);
4575
4576 *(unsigned int*)(source0 + 4 * x) = c0;
4577 }
4578
4579 source0 += pitch;
4580 source1 += pitch;
4581 source2 += pitch;
4582 source3 += pitch;
4583 source4 += pitch;
4584 source5 += pitch;
4585 source6 += pitch;
4586 source7 += pitch;
4587 source8 += pitch;
4588 source9 += pitch;
4589 sourceA += pitch;
4590 sourceB += pitch;
4591 sourceC += pitch;
4592 sourceD += pitch;
4593 sourceE += pitch;
4594 sourceF += pitch;
4595 }
4596 }
4597 else ASSERT(false);
4598
4599 #undef AVERAGE
4600 }
4601 }
4602 else if(internal.format == FORMAT_A16B16G16R16)
4603 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004604 #if defined(__i386__) || defined(__x86_64__)
4605 if(CPUID::supportsSSE2() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004606 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004607 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004608 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004609 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004610 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004611 for(int x = 0; x < width; x += 2)
4612 {
4613 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4614 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004615
Nicolas Capens47dc8672017-04-25 12:54:39 -04004616 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004617
Nicolas Capens47dc8672017-04-25 12:54:39 -04004618 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4619 }
4620
4621 source0 += pitch;
4622 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004623 }
John Bauman89401822014-05-06 15:04:28 -04004624 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004625 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004626 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004627 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004628 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004629 for(int x = 0; x < width; x += 2)
4630 {
4631 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4632 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4633 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4634 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004635
Nicolas Capens47dc8672017-04-25 12:54:39 -04004636 c0 = _mm_avg_epu16(c0, c1);
4637 c2 = _mm_avg_epu16(c2, c3);
4638 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004639
Nicolas Capens47dc8672017-04-25 12:54:39 -04004640 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4641 }
4642
4643 source0 += pitch;
4644 source1 += pitch;
4645 source2 += pitch;
4646 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004647 }
John Bauman89401822014-05-06 15:04:28 -04004648 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004649 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004650 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004651 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004652 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004653 for(int x = 0; x < width; x += 2)
4654 {
4655 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4656 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4657 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4658 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4659 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4660 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4661 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4662 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004663
Nicolas Capens47dc8672017-04-25 12:54:39 -04004664 c0 = _mm_avg_epu16(c0, c1);
4665 c2 = _mm_avg_epu16(c2, c3);
4666 c4 = _mm_avg_epu16(c4, c5);
4667 c6 = _mm_avg_epu16(c6, c7);
4668 c0 = _mm_avg_epu16(c0, c2);
4669 c4 = _mm_avg_epu16(c4, c6);
4670 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004671
Nicolas Capens47dc8672017-04-25 12:54:39 -04004672 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4673 }
4674
4675 source0 += pitch;
4676 source1 += pitch;
4677 source2 += pitch;
4678 source3 += pitch;
4679 source4 += pitch;
4680 source5 += pitch;
4681 source6 += pitch;
4682 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004683 }
John Bauman89401822014-05-06 15:04:28 -04004684 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004685 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004686 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004687 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004688 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004689 for(int x = 0; x < width; x += 2)
4690 {
4691 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4692 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4693 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4694 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4695 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4696 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4697 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4698 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4699 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4700 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4701 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4702 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4703 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4704 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4705 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4706 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04004707
Nicolas Capens47dc8672017-04-25 12:54:39 -04004708 c0 = _mm_avg_epu16(c0, c1);
4709 c2 = _mm_avg_epu16(c2, c3);
4710 c4 = _mm_avg_epu16(c4, c5);
4711 c6 = _mm_avg_epu16(c6, c7);
4712 c8 = _mm_avg_epu16(c8, c9);
4713 cA = _mm_avg_epu16(cA, cB);
4714 cC = _mm_avg_epu16(cC, cD);
4715 cE = _mm_avg_epu16(cE, cF);
4716 c0 = _mm_avg_epu16(c0, c2);
4717 c4 = _mm_avg_epu16(c4, c6);
4718 c8 = _mm_avg_epu16(c8, cA);
4719 cC = _mm_avg_epu16(cC, cE);
4720 c0 = _mm_avg_epu16(c0, c4);
4721 c8 = _mm_avg_epu16(c8, cC);
4722 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004723
Nicolas Capens47dc8672017-04-25 12:54:39 -04004724 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4725 }
4726
4727 source0 += pitch;
4728 source1 += pitch;
4729 source2 += pitch;
4730 source3 += pitch;
4731 source4 += pitch;
4732 source5 += pitch;
4733 source6 += pitch;
4734 source7 += pitch;
4735 source8 += pitch;
4736 source9 += pitch;
4737 sourceA += pitch;
4738 sourceB += pitch;
4739 sourceC += pitch;
4740 sourceD += pitch;
4741 sourceE += pitch;
4742 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004743 }
John Bauman89401822014-05-06 15:04:28 -04004744 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004745 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004746 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004747 else
4748 #endif
John Bauman89401822014-05-06 15:04:28 -04004749 {
4750 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4751
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004752 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004753 {
4754 for(int y = 0; y < height; y++)
4755 {
4756 for(int x = 0; x < 2 * width; x++)
4757 {
4758 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4759 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4760
4761 c0 = AVERAGE(c0, c1);
4762
4763 *(unsigned int*)(source0 + 4 * x) = c0;
4764 }
4765
4766 source0 += pitch;
4767 source1 += pitch;
4768 }
4769 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004770 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004771 {
4772 for(int y = 0; y < height; y++)
4773 {
4774 for(int x = 0; x < 2 * width; x++)
4775 {
4776 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4777 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4778 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4779 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4780
4781 c0 = AVERAGE(c0, c1);
4782 c2 = AVERAGE(c2, c3);
4783 c0 = AVERAGE(c0, c2);
4784
4785 *(unsigned int*)(source0 + 4 * x) = c0;
4786 }
4787
4788 source0 += pitch;
4789 source1 += pitch;
4790 source2 += pitch;
4791 source3 += pitch;
4792 }
4793 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004794 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004795 {
4796 for(int y = 0; y < height; y++)
4797 {
4798 for(int x = 0; x < 2 * width; x++)
4799 {
4800 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4801 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4802 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4803 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4804 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4805 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4806 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4807 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4808
4809 c0 = AVERAGE(c0, c1);
4810 c2 = AVERAGE(c2, c3);
4811 c4 = AVERAGE(c4, c5);
4812 c6 = AVERAGE(c6, c7);
4813 c0 = AVERAGE(c0, c2);
4814 c4 = AVERAGE(c4, c6);
4815 c0 = AVERAGE(c0, c4);
4816
4817 *(unsigned int*)(source0 + 4 * x) = c0;
4818 }
4819
4820 source0 += pitch;
4821 source1 += pitch;
4822 source2 += pitch;
4823 source3 += pitch;
4824 source4 += pitch;
4825 source5 += pitch;
4826 source6 += pitch;
4827 source7 += pitch;
4828 }
4829 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004830 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004831 {
4832 for(int y = 0; y < height; y++)
4833 {
4834 for(int x = 0; x < 2 * width; x++)
4835 {
4836 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4837 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4838 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4839 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4840 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4841 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4842 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4843 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4844 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4845 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4846 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4847 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4848 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4849 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4850 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4851 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4852
4853 c0 = AVERAGE(c0, c1);
4854 c2 = AVERAGE(c2, c3);
4855 c4 = AVERAGE(c4, c5);
4856 c6 = AVERAGE(c6, c7);
4857 c8 = AVERAGE(c8, c9);
4858 cA = AVERAGE(cA, cB);
4859 cC = AVERAGE(cC, cD);
4860 cE = AVERAGE(cE, cF);
4861 c0 = AVERAGE(c0, c2);
4862 c4 = AVERAGE(c4, c6);
4863 c8 = AVERAGE(c8, cA);
4864 cC = AVERAGE(cC, cE);
4865 c0 = AVERAGE(c0, c4);
4866 c8 = AVERAGE(c8, cC);
4867 c0 = AVERAGE(c0, c8);
4868
4869 *(unsigned int*)(source0 + 4 * x) = c0;
4870 }
4871
4872 source0 += pitch;
4873 source1 += pitch;
4874 source2 += pitch;
4875 source3 += pitch;
4876 source4 += pitch;
4877 source5 += pitch;
4878 source6 += pitch;
4879 source7 += pitch;
4880 source8 += pitch;
4881 source9 += pitch;
4882 sourceA += pitch;
4883 sourceB += pitch;
4884 sourceC += pitch;
4885 sourceD += pitch;
4886 sourceE += pitch;
4887 sourceF += pitch;
4888 }
4889 }
4890 else ASSERT(false);
4891
4892 #undef AVERAGE
4893 }
4894 }
4895 else if(internal.format == FORMAT_R32F)
4896 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004897 #if defined(__i386__) || defined(__x86_64__)
4898 if(CPUID::supportsSSE() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004899 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004900 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04004901 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004902 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004903 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004904 for(int x = 0; x < width; x += 4)
4905 {
4906 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4907 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004908
Nicolas Capens47dc8672017-04-25 12:54:39 -04004909 c0 = _mm_add_ps(c0, c1);
4910 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004911
Nicolas Capens47dc8672017-04-25 12:54:39 -04004912 _mm_store_ps((float*)(source0 + 4 * x), c0);
4913 }
4914
4915 source0 += pitch;
4916 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004917 }
John Bauman89401822014-05-06 15:04:28 -04004918 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004919 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04004920 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004921 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004922 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004923 for(int x = 0; x < width; x += 4)
4924 {
4925 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4926 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4927 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4928 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004929
Nicolas Capens47dc8672017-04-25 12:54:39 -04004930 c0 = _mm_add_ps(c0, c1);
4931 c2 = _mm_add_ps(c2, c3);
4932 c0 = _mm_add_ps(c0, c2);
4933 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004934
Nicolas Capens47dc8672017-04-25 12:54:39 -04004935 _mm_store_ps((float*)(source0 + 4 * x), c0);
4936 }
4937
4938 source0 += pitch;
4939 source1 += pitch;
4940 source2 += pitch;
4941 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004942 }
John Bauman89401822014-05-06 15:04:28 -04004943 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004944 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04004945 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004946 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004947 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004948 for(int x = 0; x < width; x += 4)
4949 {
4950 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4951 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4952 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4953 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4954 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4955 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4956 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4957 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004958
Nicolas Capens47dc8672017-04-25 12:54:39 -04004959 c0 = _mm_add_ps(c0, c1);
4960 c2 = _mm_add_ps(c2, c3);
4961 c4 = _mm_add_ps(c4, c5);
4962 c6 = _mm_add_ps(c6, c7);
4963 c0 = _mm_add_ps(c0, c2);
4964 c4 = _mm_add_ps(c4, c6);
4965 c0 = _mm_add_ps(c0, c4);
4966 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004967
Nicolas Capens47dc8672017-04-25 12:54:39 -04004968 _mm_store_ps((float*)(source0 + 4 * x), c0);
4969 }
4970
4971 source0 += pitch;
4972 source1 += pitch;
4973 source2 += pitch;
4974 source3 += pitch;
4975 source4 += pitch;
4976 source5 += pitch;
4977 source6 += pitch;
4978 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004979 }
John Bauman89401822014-05-06 15:04:28 -04004980 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05004981 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04004982 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004983 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004984 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004985 for(int x = 0; x < width; x += 4)
4986 {
4987 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4988 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4989 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4990 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4991 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4992 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4993 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4994 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4995 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4996 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4997 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4998 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4999 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
5000 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
5001 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
5002 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04005003
Nicolas Capens47dc8672017-04-25 12:54:39 -04005004 c0 = _mm_add_ps(c0, c1);
5005 c2 = _mm_add_ps(c2, c3);
5006 c4 = _mm_add_ps(c4, c5);
5007 c6 = _mm_add_ps(c6, c7);
5008 c8 = _mm_add_ps(c8, c9);
5009 cA = _mm_add_ps(cA, cB);
5010 cC = _mm_add_ps(cC, cD);
5011 cE = _mm_add_ps(cE, cF);
5012 c0 = _mm_add_ps(c0, c2);
5013 c4 = _mm_add_ps(c4, c6);
5014 c8 = _mm_add_ps(c8, cA);
5015 cC = _mm_add_ps(cC, cE);
5016 c0 = _mm_add_ps(c0, c4);
5017 c8 = _mm_add_ps(c8, cC);
5018 c0 = _mm_add_ps(c0, c8);
5019 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005020
Nicolas Capens47dc8672017-04-25 12:54:39 -04005021 _mm_store_ps((float*)(source0 + 4 * x), c0);
5022 }
5023
5024 source0 += pitch;
5025 source1 += pitch;
5026 source2 += pitch;
5027 source3 += pitch;
5028 source4 += pitch;
5029 source5 += pitch;
5030 source6 += pitch;
5031 source7 += pitch;
5032 source8 += pitch;
5033 source9 += pitch;
5034 sourceA += pitch;
5035 sourceB += pitch;
5036 sourceC += pitch;
5037 sourceD += pitch;
5038 sourceE += pitch;
5039 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005040 }
John Bauman89401822014-05-06 15:04:28 -04005041 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005042 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005043 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005044 else
5045 #endif
John Bauman89401822014-05-06 15:04:28 -04005046 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005047 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005048 {
5049 for(int y = 0; y < height; y++)
5050 {
5051 for(int x = 0; x < width; x++)
5052 {
5053 float c0 = *(float*)(source0 + 4 * x);
5054 float c1 = *(float*)(source1 + 4 * x);
5055
5056 c0 = c0 + c1;
5057 c0 *= 1.0f / 2.0f;
5058
5059 *(float*)(source0 + 4 * x) = c0;
5060 }
5061
5062 source0 += pitch;
5063 source1 += pitch;
5064 }
5065 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005066 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005067 {
5068 for(int y = 0; y < height; y++)
5069 {
5070 for(int x = 0; x < width; x++)
5071 {
5072 float c0 = *(float*)(source0 + 4 * x);
5073 float c1 = *(float*)(source1 + 4 * x);
5074 float c2 = *(float*)(source2 + 4 * x);
5075 float c3 = *(float*)(source3 + 4 * x);
5076
5077 c0 = c0 + c1;
5078 c2 = c2 + c3;
5079 c0 = c0 + c2;
5080 c0 *= 1.0f / 4.0f;
5081
5082 *(float*)(source0 + 4 * x) = c0;
5083 }
5084
5085 source0 += pitch;
5086 source1 += pitch;
5087 source2 += pitch;
5088 source3 += pitch;
5089 }
5090 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005091 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005092 {
5093 for(int y = 0; y < height; y++)
5094 {
5095 for(int x = 0; x < width; x++)
5096 {
5097 float c0 = *(float*)(source0 + 4 * x);
5098 float c1 = *(float*)(source1 + 4 * x);
5099 float c2 = *(float*)(source2 + 4 * x);
5100 float c3 = *(float*)(source3 + 4 * x);
5101 float c4 = *(float*)(source4 + 4 * x);
5102 float c5 = *(float*)(source5 + 4 * x);
5103 float c6 = *(float*)(source6 + 4 * x);
5104 float c7 = *(float*)(source7 + 4 * x);
5105
5106 c0 = c0 + c1;
5107 c2 = c2 + c3;
5108 c4 = c4 + c5;
5109 c6 = c6 + c7;
5110 c0 = c0 + c2;
5111 c4 = c4 + c6;
5112 c0 = c0 + c4;
5113 c0 *= 1.0f / 8.0f;
5114
5115 *(float*)(source0 + 4 * x) = c0;
5116 }
5117
5118 source0 += pitch;
5119 source1 += pitch;
5120 source2 += pitch;
5121 source3 += pitch;
5122 source4 += pitch;
5123 source5 += pitch;
5124 source6 += pitch;
5125 source7 += pitch;
5126 }
5127 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005128 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005129 {
5130 for(int y = 0; y < height; y++)
5131 {
5132 for(int x = 0; x < width; x++)
5133 {
5134 float c0 = *(float*)(source0 + 4 * x);
5135 float c1 = *(float*)(source1 + 4 * x);
5136 float c2 = *(float*)(source2 + 4 * x);
5137 float c3 = *(float*)(source3 + 4 * x);
5138 float c4 = *(float*)(source4 + 4 * x);
5139 float c5 = *(float*)(source5 + 4 * x);
5140 float c6 = *(float*)(source6 + 4 * x);
5141 float c7 = *(float*)(source7 + 4 * x);
5142 float c8 = *(float*)(source8 + 4 * x);
5143 float c9 = *(float*)(source9 + 4 * x);
5144 float cA = *(float*)(sourceA + 4 * x);
5145 float cB = *(float*)(sourceB + 4 * x);
5146 float cC = *(float*)(sourceC + 4 * x);
5147 float cD = *(float*)(sourceD + 4 * x);
5148 float cE = *(float*)(sourceE + 4 * x);
5149 float cF = *(float*)(sourceF + 4 * x);
5150
5151 c0 = c0 + c1;
5152 c2 = c2 + c3;
5153 c4 = c4 + c5;
5154 c6 = c6 + c7;
5155 c8 = c8 + c9;
5156 cA = cA + cB;
5157 cC = cC + cD;
5158 cE = cE + cF;
5159 c0 = c0 + c2;
5160 c4 = c4 + c6;
5161 c8 = c8 + cA;
5162 cC = cC + cE;
5163 c0 = c0 + c4;
5164 c8 = c8 + cC;
5165 c0 = c0 + c8;
5166 c0 *= 1.0f / 16.0f;
5167
5168 *(float*)(source0 + 4 * x) = c0;
5169 }
5170
5171 source0 += pitch;
5172 source1 += pitch;
5173 source2 += pitch;
5174 source3 += pitch;
5175 source4 += pitch;
5176 source5 += pitch;
5177 source6 += pitch;
5178 source7 += pitch;
5179 source8 += pitch;
5180 source9 += pitch;
5181 sourceA += pitch;
5182 sourceB += pitch;
5183 sourceC += pitch;
5184 sourceD += pitch;
5185 sourceE += pitch;
5186 sourceF += pitch;
5187 }
5188 }
5189 else ASSERT(false);
5190 }
5191 }
5192 else if(internal.format == FORMAT_G32R32F)
5193 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005194 #if defined(__i386__) || defined(__x86_64__)
5195 if(CPUID::supportsSSE() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04005196 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005197 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005198 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005199 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005200 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005201 for(int x = 0; x < width; x += 2)
5202 {
5203 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5204 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005205
Nicolas Capens47dc8672017-04-25 12:54:39 -04005206 c0 = _mm_add_ps(c0, c1);
5207 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005208
Nicolas Capens47dc8672017-04-25 12:54:39 -04005209 _mm_store_ps((float*)(source0 + 8 * x), c0);
5210 }
5211
5212 source0 += pitch;
5213 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005214 }
John Bauman89401822014-05-06 15:04:28 -04005215 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005216 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005217 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005218 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005219 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005220 for(int x = 0; x < width; x += 2)
5221 {
5222 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5223 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5224 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5225 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005226
Nicolas Capens47dc8672017-04-25 12:54:39 -04005227 c0 = _mm_add_ps(c0, c1);
5228 c2 = _mm_add_ps(c2, c3);
5229 c0 = _mm_add_ps(c0, c2);
5230 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005231
Nicolas Capens47dc8672017-04-25 12:54:39 -04005232 _mm_store_ps((float*)(source0 + 8 * x), c0);
5233 }
5234
5235 source0 += pitch;
5236 source1 += pitch;
5237 source2 += pitch;
5238 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005239 }
John Bauman89401822014-05-06 15:04:28 -04005240 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005241 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005242 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005243 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005244 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005245 for(int x = 0; x < width; x += 2)
5246 {
5247 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5248 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5249 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5250 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5251 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5252 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5253 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5254 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005255
Nicolas Capens47dc8672017-04-25 12:54:39 -04005256 c0 = _mm_add_ps(c0, c1);
5257 c2 = _mm_add_ps(c2, c3);
5258 c4 = _mm_add_ps(c4, c5);
5259 c6 = _mm_add_ps(c6, c7);
5260 c0 = _mm_add_ps(c0, c2);
5261 c4 = _mm_add_ps(c4, c6);
5262 c0 = _mm_add_ps(c0, c4);
5263 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005264
Nicolas Capens47dc8672017-04-25 12:54:39 -04005265 _mm_store_ps((float*)(source0 + 8 * x), c0);
5266 }
5267
5268 source0 += pitch;
5269 source1 += pitch;
5270 source2 += pitch;
5271 source3 += pitch;
5272 source4 += pitch;
5273 source5 += pitch;
5274 source6 += pitch;
5275 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005276 }
John Bauman89401822014-05-06 15:04:28 -04005277 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005278 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005279 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005280 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005281 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005282 for(int x = 0; x < width; x += 2)
5283 {
5284 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5285 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5286 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5287 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5288 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5289 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5290 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5291 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5292 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5293 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5294 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5295 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5296 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5297 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5298 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5299 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04005300
Nicolas Capens47dc8672017-04-25 12:54:39 -04005301 c0 = _mm_add_ps(c0, c1);
5302 c2 = _mm_add_ps(c2, c3);
5303 c4 = _mm_add_ps(c4, c5);
5304 c6 = _mm_add_ps(c6, c7);
5305 c8 = _mm_add_ps(c8, c9);
5306 cA = _mm_add_ps(cA, cB);
5307 cC = _mm_add_ps(cC, cD);
5308 cE = _mm_add_ps(cE, cF);
5309 c0 = _mm_add_ps(c0, c2);
5310 c4 = _mm_add_ps(c4, c6);
5311 c8 = _mm_add_ps(c8, cA);
5312 cC = _mm_add_ps(cC, cE);
5313 c0 = _mm_add_ps(c0, c4);
5314 c8 = _mm_add_ps(c8, cC);
5315 c0 = _mm_add_ps(c0, c8);
5316 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005317
Nicolas Capens47dc8672017-04-25 12:54:39 -04005318 _mm_store_ps((float*)(source0 + 8 * x), c0);
5319 }
5320
5321 source0 += pitch;
5322 source1 += pitch;
5323 source2 += pitch;
5324 source3 += pitch;
5325 source4 += pitch;
5326 source5 += pitch;
5327 source6 += pitch;
5328 source7 += pitch;
5329 source8 += pitch;
5330 source9 += pitch;
5331 sourceA += pitch;
5332 sourceB += pitch;
5333 sourceC += pitch;
5334 sourceD += pitch;
5335 sourceE += pitch;
5336 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005337 }
John Bauman89401822014-05-06 15:04:28 -04005338 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005339 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005340 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005341 else
5342 #endif
John Bauman89401822014-05-06 15:04:28 -04005343 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005344 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005345 {
5346 for(int y = 0; y < height; y++)
5347 {
5348 for(int x = 0; x < 2 * width; x++)
5349 {
5350 float c0 = *(float*)(source0 + 4 * x);
5351 float c1 = *(float*)(source1 + 4 * x);
5352
5353 c0 = c0 + c1;
5354 c0 *= 1.0f / 2.0f;
5355
5356 *(float*)(source0 + 4 * x) = c0;
5357 }
5358
5359 source0 += pitch;
5360 source1 += pitch;
5361 }
5362 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005363 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005364 {
5365 for(int y = 0; y < height; y++)
5366 {
5367 for(int x = 0; x < 2 * width; x++)
5368 {
5369 float c0 = *(float*)(source0 + 4 * x);
5370 float c1 = *(float*)(source1 + 4 * x);
5371 float c2 = *(float*)(source2 + 4 * x);
5372 float c3 = *(float*)(source3 + 4 * x);
5373
5374 c0 = c0 + c1;
5375 c2 = c2 + c3;
5376 c0 = c0 + c2;
5377 c0 *= 1.0f / 4.0f;
5378
5379 *(float*)(source0 + 4 * x) = c0;
5380 }
5381
5382 source0 += pitch;
5383 source1 += pitch;
5384 source2 += pitch;
5385 source3 += pitch;
5386 }
5387 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005388 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005389 {
5390 for(int y = 0; y < height; y++)
5391 {
5392 for(int x = 0; x < 2 * width; x++)
5393 {
5394 float c0 = *(float*)(source0 + 4 * x);
5395 float c1 = *(float*)(source1 + 4 * x);
5396 float c2 = *(float*)(source2 + 4 * x);
5397 float c3 = *(float*)(source3 + 4 * x);
5398 float c4 = *(float*)(source4 + 4 * x);
5399 float c5 = *(float*)(source5 + 4 * x);
5400 float c6 = *(float*)(source6 + 4 * x);
5401 float c7 = *(float*)(source7 + 4 * x);
5402
5403 c0 = c0 + c1;
5404 c2 = c2 + c3;
5405 c4 = c4 + c5;
5406 c6 = c6 + c7;
5407 c0 = c0 + c2;
5408 c4 = c4 + c6;
5409 c0 = c0 + c4;
5410 c0 *= 1.0f / 8.0f;
5411
5412 *(float*)(source0 + 4 * x) = c0;
5413 }
5414
5415 source0 += pitch;
5416 source1 += pitch;
5417 source2 += pitch;
5418 source3 += pitch;
5419 source4 += pitch;
5420 source5 += pitch;
5421 source6 += pitch;
5422 source7 += pitch;
5423 }
5424 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005425 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005426 {
5427 for(int y = 0; y < height; y++)
5428 {
5429 for(int x = 0; x < 2 * width; x++)
5430 {
5431 float c0 = *(float*)(source0 + 4 * x);
5432 float c1 = *(float*)(source1 + 4 * x);
5433 float c2 = *(float*)(source2 + 4 * x);
5434 float c3 = *(float*)(source3 + 4 * x);
5435 float c4 = *(float*)(source4 + 4 * x);
5436 float c5 = *(float*)(source5 + 4 * x);
5437 float c6 = *(float*)(source6 + 4 * x);
5438 float c7 = *(float*)(source7 + 4 * x);
5439 float c8 = *(float*)(source8 + 4 * x);
5440 float c9 = *(float*)(source9 + 4 * x);
5441 float cA = *(float*)(sourceA + 4 * x);
5442 float cB = *(float*)(sourceB + 4 * x);
5443 float cC = *(float*)(sourceC + 4 * x);
5444 float cD = *(float*)(sourceD + 4 * x);
5445 float cE = *(float*)(sourceE + 4 * x);
5446 float cF = *(float*)(sourceF + 4 * x);
5447
5448 c0 = c0 + c1;
5449 c2 = c2 + c3;
5450 c4 = c4 + c5;
5451 c6 = c6 + c7;
5452 c8 = c8 + c9;
5453 cA = cA + cB;
5454 cC = cC + cD;
5455 cE = cE + cF;
5456 c0 = c0 + c2;
5457 c4 = c4 + c6;
5458 c8 = c8 + cA;
5459 cC = cC + cE;
5460 c0 = c0 + c4;
5461 c8 = c8 + cC;
5462 c0 = c0 + c8;
5463 c0 *= 1.0f / 16.0f;
5464
5465 *(float*)(source0 + 4 * x) = c0;
5466 }
5467
5468 source0 += pitch;
5469 source1 += pitch;
5470 source2 += pitch;
5471 source3 += pitch;
5472 source4 += pitch;
5473 source5 += pitch;
5474 source6 += pitch;
5475 source7 += pitch;
5476 source8 += pitch;
5477 source9 += pitch;
5478 sourceA += pitch;
5479 sourceB += pitch;
5480 sourceC += pitch;
5481 sourceD += pitch;
5482 sourceE += pitch;
5483 sourceF += pitch;
5484 }
5485 }
5486 else ASSERT(false);
5487 }
5488 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005489 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005490 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005491 #if defined(__i386__) || defined(__x86_64__)
5492 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04005493 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005494 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005495 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005496 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005497 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005498 for(int x = 0; x < width; x++)
5499 {
5500 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5501 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005502
Nicolas Capens47dc8672017-04-25 12:54:39 -04005503 c0 = _mm_add_ps(c0, c1);
5504 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005505
Nicolas Capens47dc8672017-04-25 12:54:39 -04005506 _mm_store_ps((float*)(source0 + 16 * x), c0);
5507 }
5508
5509 source0 += pitch;
5510 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005511 }
John Bauman89401822014-05-06 15:04:28 -04005512 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005513 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005514 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005515 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005516 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005517 for(int x = 0; x < width; x++)
5518 {
5519 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5520 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5521 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5522 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005523
Nicolas Capens47dc8672017-04-25 12:54:39 -04005524 c0 = _mm_add_ps(c0, c1);
5525 c2 = _mm_add_ps(c2, c3);
5526 c0 = _mm_add_ps(c0, c2);
5527 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005528
Nicolas Capens47dc8672017-04-25 12:54:39 -04005529 _mm_store_ps((float*)(source0 + 16 * x), c0);
5530 }
5531
5532 source0 += pitch;
5533 source1 += pitch;
5534 source2 += pitch;
5535 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005536 }
John Bauman89401822014-05-06 15:04:28 -04005537 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005538 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005539 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005540 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005541 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005542 for(int x = 0; x < width; x++)
5543 {
5544 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5545 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5546 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5547 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5548 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5549 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5550 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5551 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005552
Nicolas Capens47dc8672017-04-25 12:54:39 -04005553 c0 = _mm_add_ps(c0, c1);
5554 c2 = _mm_add_ps(c2, c3);
5555 c4 = _mm_add_ps(c4, c5);
5556 c6 = _mm_add_ps(c6, c7);
5557 c0 = _mm_add_ps(c0, c2);
5558 c4 = _mm_add_ps(c4, c6);
5559 c0 = _mm_add_ps(c0, c4);
5560 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005561
Nicolas Capens47dc8672017-04-25 12:54:39 -04005562 _mm_store_ps((float*)(source0 + 16 * x), c0);
5563 }
5564
5565 source0 += pitch;
5566 source1 += pitch;
5567 source2 += pitch;
5568 source3 += pitch;
5569 source4 += pitch;
5570 source5 += pitch;
5571 source6 += pitch;
5572 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005573 }
John Bauman89401822014-05-06 15:04:28 -04005574 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005575 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005576 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005577 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005578 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005579 for(int x = 0; x < width; x++)
5580 {
5581 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5582 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5583 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5584 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5585 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5586 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5587 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5588 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5589 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5590 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5591 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5592 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5593 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5594 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5595 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5596 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
John Bauman89401822014-05-06 15:04:28 -04005597
Nicolas Capens47dc8672017-04-25 12:54:39 -04005598 c0 = _mm_add_ps(c0, c1);
5599 c2 = _mm_add_ps(c2, c3);
5600 c4 = _mm_add_ps(c4, c5);
5601 c6 = _mm_add_ps(c6, c7);
5602 c8 = _mm_add_ps(c8, c9);
5603 cA = _mm_add_ps(cA, cB);
5604 cC = _mm_add_ps(cC, cD);
5605 cE = _mm_add_ps(cE, cF);
5606 c0 = _mm_add_ps(c0, c2);
5607 c4 = _mm_add_ps(c4, c6);
5608 c8 = _mm_add_ps(c8, cA);
5609 cC = _mm_add_ps(cC, cE);
5610 c0 = _mm_add_ps(c0, c4);
5611 c8 = _mm_add_ps(c8, cC);
5612 c0 = _mm_add_ps(c0, c8);
5613 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005614
Nicolas Capens47dc8672017-04-25 12:54:39 -04005615 _mm_store_ps((float*)(source0 + 16 * x), c0);
5616 }
5617
5618 source0 += pitch;
5619 source1 += pitch;
5620 source2 += pitch;
5621 source3 += pitch;
5622 source4 += pitch;
5623 source5 += pitch;
5624 source6 += pitch;
5625 source7 += pitch;
5626 source8 += pitch;
5627 source9 += pitch;
5628 sourceA += pitch;
5629 sourceB += pitch;
5630 sourceC += pitch;
5631 sourceD += pitch;
5632 sourceE += pitch;
5633 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005634 }
John Bauman89401822014-05-06 15:04:28 -04005635 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005636 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005637 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005638 else
5639 #endif
John Bauman89401822014-05-06 15:04:28 -04005640 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005641 if(internal.samples == 2)
John Bauman89401822014-05-06 15:04:28 -04005642 {
5643 for(int y = 0; y < height; y++)
5644 {
5645 for(int x = 0; x < 4 * width; x++)
5646 {
5647 float c0 = *(float*)(source0 + 4 * x);
5648 float c1 = *(float*)(source1 + 4 * x);
5649
5650 c0 = c0 + c1;
5651 c0 *= 1.0f / 2.0f;
5652
5653 *(float*)(source0 + 4 * x) = c0;
5654 }
5655
5656 source0 += pitch;
5657 source1 += pitch;
5658 }
5659 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005660 else if(internal.samples == 4)
John Bauman89401822014-05-06 15:04:28 -04005661 {
5662 for(int y = 0; y < height; y++)
5663 {
5664 for(int x = 0; x < 4 * width; x++)
5665 {
5666 float c0 = *(float*)(source0 + 4 * x);
5667 float c1 = *(float*)(source1 + 4 * x);
5668 float c2 = *(float*)(source2 + 4 * x);
5669 float c3 = *(float*)(source3 + 4 * x);
5670
5671 c0 = c0 + c1;
5672 c2 = c2 + c3;
5673 c0 = c0 + c2;
5674 c0 *= 1.0f / 4.0f;
5675
5676 *(float*)(source0 + 4 * x) = c0;
5677 }
5678
5679 source0 += pitch;
5680 source1 += pitch;
5681 source2 += pitch;
5682 source3 += pitch;
5683 }
5684 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005685 else if(internal.samples == 8)
John Bauman89401822014-05-06 15:04:28 -04005686 {
5687 for(int y = 0; y < height; y++)
5688 {
5689 for(int x = 0; x < 4 * width; x++)
5690 {
5691 float c0 = *(float*)(source0 + 4 * x);
5692 float c1 = *(float*)(source1 + 4 * x);
5693 float c2 = *(float*)(source2 + 4 * x);
5694 float c3 = *(float*)(source3 + 4 * x);
5695 float c4 = *(float*)(source4 + 4 * x);
5696 float c5 = *(float*)(source5 + 4 * x);
5697 float c6 = *(float*)(source6 + 4 * x);
5698 float c7 = *(float*)(source7 + 4 * x);
5699
5700 c0 = c0 + c1;
5701 c2 = c2 + c3;
5702 c4 = c4 + c5;
5703 c6 = c6 + c7;
5704 c0 = c0 + c2;
5705 c4 = c4 + c6;
5706 c0 = c0 + c4;
5707 c0 *= 1.0f / 8.0f;
5708
5709 *(float*)(source0 + 4 * x) = c0;
5710 }
5711
5712 source0 += pitch;
5713 source1 += pitch;
5714 source2 += pitch;
5715 source3 += pitch;
5716 source4 += pitch;
5717 source5 += pitch;
5718 source6 += pitch;
5719 source7 += pitch;
5720 }
5721 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005722 else if(internal.samples == 16)
John Bauman89401822014-05-06 15:04:28 -04005723 {
5724 for(int y = 0; y < height; y++)
5725 {
5726 for(int x = 0; x < 4 * width; x++)
5727 {
5728 float c0 = *(float*)(source0 + 4 * x);
5729 float c1 = *(float*)(source1 + 4 * x);
5730 float c2 = *(float*)(source2 + 4 * x);
5731 float c3 = *(float*)(source3 + 4 * x);
5732 float c4 = *(float*)(source4 + 4 * x);
5733 float c5 = *(float*)(source5 + 4 * x);
5734 float c6 = *(float*)(source6 + 4 * x);
5735 float c7 = *(float*)(source7 + 4 * x);
5736 float c8 = *(float*)(source8 + 4 * x);
5737 float c9 = *(float*)(source9 + 4 * x);
5738 float cA = *(float*)(sourceA + 4 * x);
5739 float cB = *(float*)(sourceB + 4 * x);
5740 float cC = *(float*)(sourceC + 4 * x);
5741 float cD = *(float*)(sourceD + 4 * x);
5742 float cE = *(float*)(sourceE + 4 * x);
5743 float cF = *(float*)(sourceF + 4 * x);
5744
5745 c0 = c0 + c1;
5746 c2 = c2 + c3;
5747 c4 = c4 + c5;
5748 c6 = c6 + c7;
5749 c8 = c8 + c9;
5750 cA = cA + cB;
5751 cC = cC + cD;
5752 cE = cE + cF;
5753 c0 = c0 + c2;
5754 c4 = c4 + c6;
5755 c8 = c8 + cA;
5756 cC = cC + cE;
5757 c0 = c0 + c4;
5758 c8 = c8 + cC;
5759 c0 = c0 + c8;
5760 c0 *= 1.0f / 16.0f;
5761
5762 *(float*)(source0 + 4 * x) = c0;
5763 }
5764
5765 source0 += pitch;
5766 source1 += pitch;
5767 source2 += pitch;
5768 source3 += pitch;
5769 source4 += pitch;
5770 source5 += pitch;
5771 source6 += pitch;
5772 source7 += pitch;
5773 source8 += pitch;
5774 source9 += pitch;
5775 sourceA += pitch;
5776 sourceB += pitch;
5777 sourceC += pitch;
5778 sourceD += pitch;
5779 sourceE += pitch;
5780 sourceF += pitch;
5781 }
5782 }
5783 else ASSERT(false);
5784 }
5785 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005786 else if(internal.format == FORMAT_R5G6B5)
5787 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005788 #if defined(__i386__) || defined(__x86_64__)
5789 if(CPUID::supportsSSE2() && (width % 8) == 0)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005790 {
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005791 if(internal.samples == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005792 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005793 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005794 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005795 for(int x = 0; x < width; x += 8)
5796 {
5797 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5798 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005799
Nicolas Capens47dc8672017-04-25 12:54:39 -04005800 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5801 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5802 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5803 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5804 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5805 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005806
Nicolas Capens47dc8672017-04-25 12:54:39 -04005807 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5808 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5809 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5810 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5811 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005812
Nicolas Capens47dc8672017-04-25 12:54:39 -04005813 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5814 }
5815
5816 source0 += pitch;
5817 source1 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005818 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005819 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005820 else if(internal.samples == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005821 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005822 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005823 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005824 for(int x = 0; x < width; x += 8)
5825 {
5826 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5827 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5828 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5829 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005830
Nicolas Capens47dc8672017-04-25 12:54:39 -04005831 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5832 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5833 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5834 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5835 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5836 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5837 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5838 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5839 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5840 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005841
Nicolas Capens47dc8672017-04-25 12:54:39 -04005842 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5843 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5844 c0 = _mm_avg_epu8(c0, c2);
5845 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5846 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5847 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5848 c1 = _mm_avg_epu16(c1, c3);
5849 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5850 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005851
Nicolas Capens47dc8672017-04-25 12:54:39 -04005852 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5853 }
5854
5855 source0 += pitch;
5856 source1 += pitch;
5857 source2 += pitch;
5858 source3 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005859 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005860 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005861 else if(internal.samples == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005862 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005863 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005864 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005865 for(int x = 0; x < width; x += 8)
5866 {
5867 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5868 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5869 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5870 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5871 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5872 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5873 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5874 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005875
Nicolas Capens47dc8672017-04-25 12:54:39 -04005876 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5877 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5878 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5879 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5880 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5881 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5882 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5883 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5884 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5885 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5886 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5887 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5888 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5889 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5890 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5891 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5892 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5893 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005894
Nicolas Capens47dc8672017-04-25 12:54:39 -04005895 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5896 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5897 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5898 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5899 c0 = _mm_avg_epu8(c0, c2);
5900 c4 = _mm_avg_epu8(c4, c6);
5901 c0 = _mm_avg_epu8(c0, c4);
5902 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5903 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5904 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5905 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5906 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5907 c1 = _mm_avg_epu16(c1, c3);
5908 c5 = _mm_avg_epu16(c5, c7);
5909 c1 = _mm_avg_epu16(c1, c5);
5910 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5911 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005912
Nicolas Capens47dc8672017-04-25 12:54:39 -04005913 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5914 }
5915
5916 source0 += pitch;
5917 source1 += pitch;
5918 source2 += pitch;
5919 source3 += pitch;
5920 source4 += pitch;
5921 source5 += pitch;
5922 source6 += pitch;
5923 source7 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005924 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005925 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05005926 else if(internal.samples == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005927 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005928 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005929 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005930 for(int x = 0; x < width; x += 8)
5931 {
5932 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5933 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5934 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5935 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5936 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5937 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5938 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5939 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5940 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5941 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5942 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5943 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5944 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5945 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5946 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5947 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005948
Nicolas Capens47dc8672017-04-25 12:54:39 -04005949 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5950 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5951 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5952 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5953 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5954 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5955 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5956 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5957 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5958 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5959 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5960 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5961 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5962 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5963 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5964 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5965 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5966 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5967 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5968 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5969 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5970 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5971 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5972 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5973 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5974 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5975 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5976 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5977 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5978 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5979 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5980 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5981 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5982 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005983
Nicolas Capens47dc8672017-04-25 12:54:39 -04005984 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5985 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5986 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5987 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5988 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5989 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5990 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5991 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5992 c0 = _mm_avg_epu8(c0, c2);
5993 c4 = _mm_avg_epu8(c4, c6);
5994 c8 = _mm_avg_epu8(c8, cA);
5995 cC = _mm_avg_epu8(cC, cE);
5996 c0 = _mm_avg_epu8(c0, c4);
5997 c8 = _mm_avg_epu8(c8, cC);
5998 c0 = _mm_avg_epu8(c0, c8);
5999 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
6000 c1 = _mm_avg_epu16(c0__g_, c1__g_);
6001 c3 = _mm_avg_epu16(c2__g_, c3__g_);
6002 c5 = _mm_avg_epu16(c4__g_, c5__g_);
6003 c7 = _mm_avg_epu16(c6__g_, c7__g_);
6004 c9 = _mm_avg_epu16(c8__g_, c9__g_);
6005 cB = _mm_avg_epu16(cA__g_, cB__g_);
6006 cD = _mm_avg_epu16(cC__g_, cD__g_);
6007 cF = _mm_avg_epu16(cE__g_, cF__g_);
6008 c1 = _mm_avg_epu8(c1, c3);
6009 c5 = _mm_avg_epu8(c5, c7);
6010 c9 = _mm_avg_epu8(c9, cB);
6011 cD = _mm_avg_epu8(cD, cF);
6012 c1 = _mm_avg_epu8(c1, c5);
6013 c9 = _mm_avg_epu8(c9, cD);
6014 c1 = _mm_avg_epu8(c1, c9);
6015 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
6016 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04006017
Nicolas Capens47dc8672017-04-25 12:54:39 -04006018 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
6019 }
6020
6021 source0 += pitch;
6022 source1 += pitch;
6023 source2 += pitch;
6024 source3 += pitch;
6025 source4 += pitch;
6026 source5 += pitch;
6027 source6 += pitch;
6028 source7 += pitch;
6029 source8 += pitch;
6030 source9 += pitch;
6031 sourceA += pitch;
6032 sourceB += pitch;
6033 sourceC += pitch;
6034 sourceD += pitch;
6035 sourceE += pitch;
6036 sourceF += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04006037 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04006038 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04006039 else ASSERT(false);
Nicolas Capens0e12a922015-09-04 09:18:15 -04006040 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04006041 else
6042 #endif
Nicolas Capens0e12a922015-09-04 09:18:15 -04006043 {
6044 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
6045
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006046 if(internal.samples == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006047 {
6048 for(int y = 0; y < height; y++)
6049 {
6050 for(int x = 0; x < width; x++)
6051 {
6052 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6053 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6054
6055 c0 = AVERAGE(c0, c1);
6056
6057 *(unsigned short*)(source0 + 2 * x) = c0;
6058 }
6059
6060 source0 += pitch;
6061 source1 += pitch;
6062 }
6063 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006064 else if(internal.samples == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006065 {
6066 for(int y = 0; y < height; y++)
6067 {
6068 for(int x = 0; x < width; x++)
6069 {
6070 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6071 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6072 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6073 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6074
6075 c0 = AVERAGE(c0, c1);
6076 c2 = AVERAGE(c2, c3);
6077 c0 = AVERAGE(c0, c2);
6078
6079 *(unsigned short*)(source0 + 2 * x) = c0;
6080 }
6081
6082 source0 += pitch;
6083 source1 += pitch;
6084 source2 += pitch;
6085 source3 += pitch;
6086 }
6087 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006088 else if(internal.samples == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006089 {
6090 for(int y = 0; y < height; y++)
6091 {
6092 for(int x = 0; x < width; x++)
6093 {
6094 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6095 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6096 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6097 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6098 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6099 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6100 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6101 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6102
6103 c0 = AVERAGE(c0, c1);
6104 c2 = AVERAGE(c2, c3);
6105 c4 = AVERAGE(c4, c5);
6106 c6 = AVERAGE(c6, c7);
6107 c0 = AVERAGE(c0, c2);
6108 c4 = AVERAGE(c4, c6);
6109 c0 = AVERAGE(c0, c4);
6110
6111 *(unsigned short*)(source0 + 2 * x) = c0;
6112 }
6113
6114 source0 += pitch;
6115 source1 += pitch;
6116 source2 += pitch;
6117 source3 += pitch;
6118 source4 += pitch;
6119 source5 += pitch;
6120 source6 += pitch;
6121 source7 += pitch;
6122 }
6123 }
Nicolas Capensbfa23b32017-12-11 10:06:37 -05006124 else if(internal.samples == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04006125 {
6126 for(int y = 0; y < height; y++)
6127 {
6128 for(int x = 0; x < width; x++)
6129 {
6130 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6131 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6132 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6133 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6134 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6135 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6136 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6137 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6138 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
6139 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
6140 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
6141 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
6142 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
6143 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
6144 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
6145 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
6146
6147 c0 = AVERAGE(c0, c1);
6148 c2 = AVERAGE(c2, c3);
6149 c4 = AVERAGE(c4, c5);
6150 c6 = AVERAGE(c6, c7);
6151 c8 = AVERAGE(c8, c9);
6152 cA = AVERAGE(cA, cB);
6153 cC = AVERAGE(cC, cD);
6154 cE = AVERAGE(cE, cF);
6155 c0 = AVERAGE(c0, c2);
6156 c4 = AVERAGE(c4, c6);
6157 c8 = AVERAGE(c8, cA);
6158 cC = AVERAGE(cC, cE);
6159 c0 = AVERAGE(c0, c4);
6160 c8 = AVERAGE(c8, cC);
6161 c0 = AVERAGE(c0, c8);
6162
6163 *(unsigned short*)(source0 + 2 * x) = c0;
6164 }
6165
6166 source0 += pitch;
6167 source1 += pitch;
6168 source2 += pitch;
6169 source3 += pitch;
6170 source4 += pitch;
6171 source5 += pitch;
6172 source6 += pitch;
6173 source7 += pitch;
6174 source8 += pitch;
6175 source9 += pitch;
6176 sourceA += pitch;
6177 sourceB += pitch;
6178 sourceC += pitch;
6179 sourceD += pitch;
6180 sourceE += pitch;
6181 sourceF += pitch;
6182 }
6183 }
6184 else ASSERT(false);
6185
6186 #undef AVERAGE
6187 }
6188 }
John Bauman89401822014-05-06 15:04:28 -04006189 else
6190 {
6191 // UNIMPLEMENTED();
6192 }
6193 }
6194}