1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
19#include "ETC_Decoder.hpp"
20#include "Renderer.hpp"
21#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
26#include "Reactor/Reactor.hpp"
27
28#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
32
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
44
45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
46 {
47 ASSERT((x >= -border) && (x < (width + border)));
48 ASSERT((y >= -border) && (y < (height + border)));
49 ASSERT((z >= 0) && (z < depth));
50
51 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
52
53 for(int i = 0; i < samples; i++)
54 {
55 write(element, color);
56 element += sliceB;
57 }
58 }
59
60 void Surface::Buffer::write(int x, int y, const Color<float> &color)
61 {
62 ASSERT((x >= -border) && (x < (width + border)));
63 ASSERT((y >= -border) && (y < (height + border)));
64
65 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB;
66
67 for(int i = 0; i < samples; i++)
68 {
69 write(element, color);
70 element += sliceB;
71 }
72 }
73
74 inline void Surface::Buffer::write(void *element, const Color<float> &color)
75 {
76 float r = color.r;
77 float g = color.g;
78 float b = color.b;
79 float a = color.a;
80
81 if(isSRGBformat(format))
82 {
83 r = linearToSRGB(r);
84 g = linearToSRGB(g);
85 b = linearToSRGB(b);
86 }
87
88 switch(format)
89 {
90 case FORMAT_A8:
91 *(unsigned char*)element = unorm<8>(a);
92 break;
93 case FORMAT_R8_SNORM:
94 *(char*)element = snorm<8>(r);
95 break;
96 case FORMAT_R8:
97 *(unsigned char*)element = unorm<8>(r);
98 break;
99 case FORMAT_R8I:
100 *(char*)element = scast<8>(r);
101 break;
102 case FORMAT_R8UI:
103 *(unsigned char*)element = ucast<8>(r);
104 break;
105 case FORMAT_R16I:
106 *(short*)element = scast<16>(r);
107 break;
108 case FORMAT_R16UI:
109 *(unsigned short*)element = ucast<16>(r);
110 break;
111 case FORMAT_R32I:
112 *(int*)element = static_cast<int>(r);
113 break;
114 case FORMAT_R32UI:
115 *(unsigned int*)element = static_cast<unsigned int>(r);
116 break;
117 case FORMAT_R3G3B2:
118 *(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0);
119 break;
120 case FORMAT_A8R3G3B2:
121 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0);
122 break;
123 case FORMAT_X4R4G4B4:
124 *(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0);
125 break;
126 case FORMAT_A4R4G4B4:
127 *(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0);
128 break;
129 case FORMAT_R4G4B4A4:
130 *(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0);
131 break;
132 case FORMAT_R5G6B5:
133 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0);
134 break;
135 case FORMAT_A1R5G5B5:
136 *(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0);
137 break;
138 case FORMAT_R5G5B5A1:
139 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0);
140 break;
141 case FORMAT_X1R5G5B5:
142 *(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0);
143 break;
144 case FORMAT_A8R8G8B8:
145 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0);
146 break;
147 case FORMAT_X8R8G8B8:
148 *(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0);
149 break;
150 case FORMAT_A8B8G8R8_SNORM:
151 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) |
152 (static_cast<unsigned int>(snorm<8>(b)) << 16) |
153 (static_cast<unsigned int>(snorm<8>(g)) << 8) |
154 (static_cast<unsigned int>(snorm<8>(r)) << 0);
155 break;
156 case FORMAT_A8B8G8R8:
157 case FORMAT_SRGB8_A8:
158 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
159 break;
160 case FORMAT_A8B8G8R8I:
161 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) |
162 (static_cast<unsigned int>(scast<8>(b)) << 16) |
163 (static_cast<unsigned int>(scast<8>(g)) << 8) |
164 (static_cast<unsigned int>(scast<8>(r)) << 0);
165 break;
166 case FORMAT_A8B8G8R8UI:
167 *(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
168 break;
169 case FORMAT_X8B8G8R8_SNORM:
170 *(unsigned int*)element = 0x7F000000 |
171 (static_cast<unsigned int>(snorm<8>(b)) << 16) |
172 (static_cast<unsigned int>(snorm<8>(g)) << 8) |
173 (static_cast<unsigned int>(snorm<8>(r)) << 0);
174 break;
175 case FORMAT_X8B8G8R8:
176 case FORMAT_SRGB8_X8:
177 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
178 break;
179 case FORMAT_X8B8G8R8I:
180 *(unsigned int*)element = 0x7F000000 |
181 (static_cast<unsigned int>(scast<8>(b)) << 16) |
182 (static_cast<unsigned int>(scast<8>(g)) << 8) |
183 (static_cast<unsigned int>(scast<8>(r)) << 0);
184 case FORMAT_X8B8G8R8UI:
185 *(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
186 break;
187 case FORMAT_A2R10G10B10:
188 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0);
189 break;
190 case FORMAT_A2B10G10R10:
191 case FORMAT_A2B10G10R10UI:
192 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0);
193 break;
194 case FORMAT_G8R8_SNORM:
195 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) |
196 (static_cast<unsigned short>(snorm<8>(r)) << 0);
197 break;
198 case FORMAT_G8R8:
199 *(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0);
200 break;
201 case FORMAT_G8R8I:
202 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) |
203 (static_cast<unsigned short>(scast<8>(r)) << 0);
204 break;
205 case FORMAT_G8R8UI:
206 *(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0);
207 break;
208 case FORMAT_G16R16:
209 *(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0);
210 break;
211 case FORMAT_G16R16I:
212 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) |
213 (static_cast<unsigned int>(scast<16>(r)) << 0);
214 break;
215 case FORMAT_G16R16UI:
216 *(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0);
217 break;
218 case FORMAT_G32R32I:
219 case FORMAT_G32R32UI:
220 ((unsigned int*)element)[0] = static_cast<unsigned int>(r);
221 ((unsigned int*)element)[1] = static_cast<unsigned int>(g);
222 break;
223 case FORMAT_A16B16G16R16:
224 ((unsigned short*)element)[0] = unorm<16>(r);
225 ((unsigned short*)element)[1] = unorm<16>(g);
226 ((unsigned short*)element)[2] = unorm<16>(b);
227 ((unsigned short*)element)[3] = unorm<16>(a);
228 break;
229 case FORMAT_A16B16G16R16I:
230 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r));
231 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g));
232 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b));
233 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a));
234 break;
235 case FORMAT_A16B16G16R16UI:
236 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r));
237 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g));
238 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b));
239 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a));
240 break;
241 case FORMAT_X16B16G16R16I:
242 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r));
243 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g));
244 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b));
245 break;
246 case FORMAT_X16B16G16R16UI:
247 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r));
248 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g));
249 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b));
250 break;
251 case FORMAT_A32B32G32R32I:
252 case FORMAT_A32B32G32R32UI:
253 ((unsigned int*)element)[0] = static_cast<unsigned int>(r);
254 ((unsigned int*)element)[1] = static_cast<unsigned int>(g);
255 ((unsigned int*)element)[2] = static_cast<unsigned int>(b);
256 ((unsigned int*)element)[3] = static_cast<unsigned int>(a);
257 break;
258 case FORMAT_X32B32G32R32I:
259 case FORMAT_X32B32G32R32UI:
260 ((unsigned int*)element)[0] = static_cast<unsigned int>(r);
261 ((unsigned int*)element)[1] = static_cast<unsigned int>(g);
262 ((unsigned int*)element)[2] = static_cast<unsigned int>(b);
263 break;
264 case FORMAT_V8U8:
265 *(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
266 break;
267 case FORMAT_L6V5U5:
268 *(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0);
269 break;
270 case FORMAT_Q8W8V8U8:
271 *(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
272 break;
273 case FORMAT_X8L8V8U8:
274 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0);
275 break;
276 case FORMAT_V16U16:
277 *(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0);
278 break;
279 case FORMAT_A2W10V10U10:
280 *(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0);
281 break;
282 case FORMAT_A16W16V16U16:
283 ((unsigned short*)element)[0] = snorm<16>(r);
284 ((unsigned short*)element)[1] = snorm<16>(g);
285 ((unsigned short*)element)[2] = snorm<16>(b);
286 ((unsigned short*)element)[3] = unorm<16>(a);
287 break;
288 case FORMAT_Q16W16V16U16:
289 ((unsigned short*)element)[0] = snorm<16>(r);
290 ((unsigned short*)element)[1] = snorm<16>(g);
291 ((unsigned short*)element)[2] = snorm<16>(b);
292 ((unsigned short*)element)[3] = snorm<16>(a);
293 break;
294 case FORMAT_R8G8B8:
295 ((unsigned char*)element)[0] = unorm<8>(b);
296 ((unsigned char*)element)[1] = unorm<8>(g);
297 ((unsigned char*)element)[2] = unorm<8>(r);
298 break;
299 case FORMAT_B8G8R8:
300 ((unsigned char*)element)[0] = unorm<8>(r);
301 ((unsigned char*)element)[1] = unorm<8>(g);
302 ((unsigned char*)element)[2] = unorm<8>(b);
303 break;
304 case FORMAT_R16F:
305 *(half*)element = (half)r;
306 break;
307 case FORMAT_A16F:
308 *(half*)element = (half)a;
309 break;
310 case FORMAT_G16R16F:
311 ((half*)element)[0] = (half)r;
312 ((half*)element)[1] = (half)g;
313 break;
314 case FORMAT_X16B16G16R16F_UNSIGNED:
315 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
316 // Fall through to FORMAT_X16B16G16R16F.
317 case FORMAT_X16B16G16R16F:
318 ((half*)element)[3] = 1.0f;
319 // Fall through to FORMAT_B16G16R16F.
320 case FORMAT_B16G16R16F:
321 ((half*)element)[0] = (half)r;
322 ((half*)element)[1] = (half)g;
323 ((half*)element)[2] = (half)b;
324 break;
325 case FORMAT_A16B16G16R16F:
326 ((half*)element)[0] = (half)r;
327 ((half*)element)[1] = (half)g;
328 ((half*)element)[2] = (half)b;
329 ((half*)element)[3] = (half)a;
330 break;
331 case FORMAT_A32F:
332 *(float*)element = a;
333 break;
334 case FORMAT_R32F:
335 *(float*)element = r;
336 break;
337 case FORMAT_G32R32F:
338 ((float*)element)[0] = r;
339 ((float*)element)[1] = g;
340 break;
341 case FORMAT_X32B32G32R32F_UNSIGNED:
342 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f);
343 // Fall through to FORMAT_X32B32G32R32F.
344 case FORMAT_X32B32G32R32F:
345 ((float*)element)[3] = 1.0f;
346 // Fall through to FORMAT_B32G32R32F.
347 case FORMAT_B32G32R32F:
348 ((float*)element)[0] = r;
349 ((float*)element)[1] = g;
350 ((float*)element)[2] = b;
351 break;
352 case FORMAT_A32B32G32R32F:
353 ((float*)element)[0] = r;
354 ((float*)element)[1] = g;
355 ((float*)element)[2] = b;
356 ((float*)element)[3] = a;
357 break;
358 case FORMAT_D32F:
359 case FORMAT_D32FS8:
360 case FORMAT_D32F_LOCKABLE:
361 case FORMAT_D32FS8_TEXTURE:
362 case FORMAT_D32F_SHADOW:
363 case FORMAT_D32FS8_SHADOW:
364 *((float*)element) = r;
365 break;
366 case FORMAT_D32F_COMPLEMENTARY:
367 case FORMAT_D32FS8_COMPLEMENTARY:
368 *((float*)element) = 1 - r;
369 break;
370 case FORMAT_S8:
371 *((unsigned char*)element) = unorm<8>(r);
372 break;
373 case FORMAT_L8:
374 *(unsigned char*)element = unorm<8>(r);
375 break;
376 case FORMAT_A4L4:
377 *(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0);
378 break;
379 case FORMAT_L16:
380 *(unsigned short*)element = unorm<16>(r);
381 break;
382 case FORMAT_A8L8:
383 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0);
384 break;
385 case FORMAT_L16F:
386 *(half*)element = (half)r;
387 break;
388 case FORMAT_A16L16F:
389 ((half*)element)[0] = (half)r;
390 ((half*)element)[1] = (half)a;
391 break;
392 case FORMAT_L32F:
393 *(float*)element = r;
394 break;
395 case FORMAT_A32L32F:
396 ((float*)element)[0] = r;
397 ((float*)element)[1] = a;
398 break;
399 default:
400 ASSERT(false);
401 }
402 }
403
404 Color<float> Surface::Buffer::read(int x, int y, int z) const
405 {
406 ASSERT((x >= -border) && (x < (width + border)));
407 ASSERT((y >= -border) && (y < (height + border)));
408 ASSERT((z >= 0) && (z < depth));
409
410 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB;
411
412 return read(element);
413 }
414
415 Color<float> Surface::Buffer::read(int x, int y) const
416 {
417 ASSERT((x >= -border) && (x < (width + border)));
418 ASSERT((y >= -border) && (y < (height + border)));
419
420 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
421
422 return read(element);
423 }
424
425 inline Color<float> Surface::Buffer::read(void *element) const
426 {
427 float r = 0.0f;
428 float g = 0.0f;
429 float b = 0.0f;
430 float a = 1.0f;
431
432 switch(format)
433 {
434 case FORMAT_P8:
435 {
436 ASSERT(palette);
437
438 unsigned int abgr = palette[*(unsigned char*)element];
439
440 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
441 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
442 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
443 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
444 }
445 break;
446 case FORMAT_A8P8:
447 {
448 ASSERT(palette);
449
450 unsigned int bgr = palette[((unsigned char*)element)[0]];
451
452 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
453 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
454 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
455 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
456 }
457 break;
458 case FORMAT_A8:
459 r = 0;
460 g = 0;
461 b = 0;
462 a = *(unsigned char*)element * (1.0f / 0xFF);
463 break;
464 case FORMAT_R8_SNORM:
465 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
466 break;
467 case FORMAT_R8:
468 r = *(unsigned char*)element * (1.0f / 0xFF);
469 break;
470 case FORMAT_R8I:
471 r = *(signed char*)element;
472 break;
473 case FORMAT_R8UI:
474 r = *(unsigned char*)element;
475 break;
476 case FORMAT_R3G3B2:
477 {
478 unsigned char rgb = *(unsigned char*)element;
479
480 r = (rgb & 0xE0) * (1.0f / 0xE0);
481 g = (rgb & 0x1C) * (1.0f / 0x1C);
482 b = (rgb & 0x03) * (1.0f / 0x03);
483 }
484 break;
485 case FORMAT_A8R3G3B2:
486 {
487 unsigned short argb = *(unsigned short*)element;
488
489 a = (argb & 0xFF00) * (1.0f / 0xFF00);
490 r = (argb & 0x00E0) * (1.0f / 0x00E0);
491 g = (argb & 0x001C) * (1.0f / 0x001C);
492 b = (argb & 0x0003) * (1.0f / 0x0003);
493 }
494 break;
495 case FORMAT_X4R4G4B4:
496 {
497 unsigned short rgb = *(unsigned short*)element;
498
499 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
500 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
501 b = (rgb & 0x000F) * (1.0f / 0x000F);
502 }
503 break;
504 case FORMAT_A4R4G4B4:
505 {
506 unsigned short argb = *(unsigned short*)element;
507
508 a = (argb & 0xF000) * (1.0f / 0xF000);
509 r = (argb & 0x0F00) * (1.0f / 0x0F00);
510 g = (argb & 0x00F0) * (1.0f / 0x00F0);
511 b = (argb & 0x000F) * (1.0f / 0x000F);
512 }
513 break;
514 case FORMAT_R4G4B4A4:
515 {
516 unsigned short rgba = *(unsigned short*)element;
517
518 r = (rgba & 0xF000) * (1.0f / 0xF000);
519 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
520 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
521 a = (rgba & 0x000F) * (1.0f / 0x000F);
522 }
523 break;
524 case FORMAT_R5G6B5:
525 {
526 unsigned short rgb = *(unsigned short*)element;
527
528 r = (rgb & 0xF800) * (1.0f / 0xF800);
529 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
530 b = (rgb & 0x001F) * (1.0f / 0x001F);
531 }
532 break;
533 case FORMAT_A1R5G5B5:
534 {
535 unsigned short argb = *(unsigned short*)element;
536
537 a = (argb & 0x8000) * (1.0f / 0x8000);
538 r = (argb & 0x7C00) * (1.0f / 0x7C00);
539 g = (argb & 0x03E0) * (1.0f / 0x03E0);
540 b = (argb & 0x001F) * (1.0f / 0x001F);
541 }
542 break;
543 case FORMAT_R5G5B5A1:
544 {
545 unsigned short rgba = *(unsigned short*)element;
546
547 r = (rgba & 0xF800) * (1.0f / 0xF800);
548 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
549 b = (rgba & 0x003E) * (1.0f / 0x003E);
550 a = (rgba & 0x0001) * (1.0f / 0x0001);
551 }
552 break;
553 case FORMAT_X1R5G5B5:
554 {
555 unsigned short xrgb = *(unsigned short*)element;
556
557 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
558 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
559 b = (xrgb & 0x001F) * (1.0f / 0x001F);
560 }
561 break;
562 case FORMAT_A8R8G8B8:
563 {
564 unsigned int argb = *(unsigned int*)element;
565
566 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
567 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
568 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
569 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
570 }
571 break;
572 case FORMAT_X8R8G8B8:
573 {
574 unsigned int xrgb = *(unsigned int*)element;
575
576 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
577 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
578 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
579 }
580 break;
581 case FORMAT_A8B8G8R8_SNORM:
582 {
583 signed char* abgr = (signed char*)element;
584
585 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
586 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
587 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
588 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
589 }
590 break;
591 case FORMAT_A8B8G8R8:
592 case FORMAT_SRGB8_A8:
593 {
594 unsigned int abgr = *(unsigned int*)element;
595
596 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
597 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
598 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
599 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
600 }
601 break;
602 case FORMAT_A8B8G8R8I:
603 {
604 signed char* abgr = (signed char*)element;
605
606 r = abgr[0];
607 g = abgr[1];
608 b = abgr[2];
609 a = abgr[3];
610 }
611 break;
612 case FORMAT_A8B8G8R8UI:
613 {
614 unsigned char* abgr = (unsigned char*)element;
615
616 r = abgr[0];
617 g = abgr[1];
618 b = abgr[2];
619 a = abgr[3];
620 }
621 break;
622 case FORMAT_X8B8G8R8_SNORM:
623 {
624 signed char* bgr = (signed char*)element;
625
626 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
627 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
628 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
629 }
630 break;
631 case FORMAT_X8B8G8R8:
632 case FORMAT_SRGB8_X8:
633 {
634 unsigned int xbgr = *(unsigned int*)element;
635
636 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
637 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
638 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
639 }
640 break;
641 case FORMAT_X8B8G8R8I:
642 {
643 signed char* bgr = (signed char*)element;
644
645 r = bgr[0];
646 g = bgr[1];
647 b = bgr[2];
648 }
649 break;
650 case FORMAT_X8B8G8R8UI:
651 {
652 unsigned char* bgr = (unsigned char*)element;
653
654 r = bgr[0];
655 g = bgr[1];
656 b = bgr[2];
657 }
658 break;
659 case FORMAT_G8R8_SNORM:
660 {
661 signed char* gr = (signed char*)element;
662
663 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
664 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
665 }
666 break;
667 case FORMAT_G8R8:
668 {
669 unsigned short gr = *(unsigned short*)element;
670
671 g = (gr & 0xFF00) * (1.0f / 0xFF00);
672 r = (gr & 0x00FF) * (1.0f / 0x00FF);
673 }
674 break;
675 case FORMAT_G8R8I:
676 {
677 signed char* gr = (signed char*)element;
678
679 r = gr[0];
680 g = gr[1];
681 }
682 break;
683 case FORMAT_G8R8UI:
684 {
685 unsigned char* gr = (unsigned char*)element;
686
687 r = gr[0];
688 g = gr[1];
689 }
690 break;
691 case FORMAT_R16I:
692 r = *((short*)element);
693 break;
694 case FORMAT_R16UI:
695 r = *((unsigned short*)element);
696 break;
697 case FORMAT_G16R16I:
698 {
699 short* gr = (short*)element;
700
701 r = gr[0];
702 g = gr[1];
703 }
704 break;
705 case FORMAT_G16R16:
706 {
707 unsigned int gr = *(unsigned int*)element;
708
709 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
710 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
711 }
712 break;
713 case FORMAT_G16R16UI:
714 {
715 unsigned short* gr = (unsigned short*)element;
716
717 r = gr[0];
718 g = gr[1];
719 }
720 break;
721 case FORMAT_A2R10G10B10:
722 {
723 unsigned int argb = *(unsigned int*)element;
724
725 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
726 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
727 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
728 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
729 }
730 break;
731 case FORMAT_A2B10G10R10:
732 {
733 unsigned int abgr = *(unsigned int*)element;
734
735 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
736 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
737 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
738 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
739 }
740 break;
741 case FORMAT_A2B10G10R10UI:
742 {
743 unsigned int abgr = *(unsigned int*)element;
744
745 a = static_cast<float>((abgr & 0xC0000000) >> 30);
746 b = static_cast<float>((abgr & 0x3FF00000) >> 20);
747 g = static_cast<float>((abgr & 0x000FFC00) >> 10);
748 r = static_cast<float>(abgr & 0x000003FF);
749 }
750 break;
751 case FORMAT_A16B16G16R16I:
752 {
753 short* abgr = (short*)element;
754
755 r = abgr[0];
756 g = abgr[1];
757 b = abgr[2];
758 a = abgr[3];
759 }
760 break;
761 case FORMAT_A16B16G16R16:
762 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
763 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
764 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
765 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
766 break;
767 case FORMAT_A16B16G16R16UI:
768 {
769 unsigned short* abgr = (unsigned short*)element;
770
771 r = abgr[0];
772 g = abgr[1];
773 b = abgr[2];
774 a = abgr[3];
775 }
776 break;
777 case FORMAT_X16B16G16R16I:
778 {
779 short* bgr = (short*)element;
780
781 r = bgr[0];
782 g = bgr[1];
783 b = bgr[2];
784 }
785 break;
786 case FORMAT_X16B16G16R16UI:
787 {
788 unsigned short* bgr = (unsigned short*)element;
789
790 r = bgr[0];
791 g = bgr[1];
792 b = bgr[2];
793 }
794 break;
795 case FORMAT_A32B32G32R32I:
796 {
797 int* abgr = (int*)element;
798
799 r = static_cast<float>(abgr[0]);
800 g = static_cast<float>(abgr[1]);
801 b = static_cast<float>(abgr[2]);
802 a = static_cast<float>(abgr[3]);
803 }
804 break;
805 case FORMAT_A32B32G32R32UI:
806 {
807 unsigned int* abgr = (unsigned int*)element;
808
809 r = static_cast<float>(abgr[0]);
810 g = static_cast<float>(abgr[1]);
811 b = static_cast<float>(abgr[2]);
812 a = static_cast<float>(abgr[3]);
813 }
814 break;
815 case FORMAT_X32B32G32R32I:
816 {
817 int* bgr = (int*)element;
818
819 r = static_cast<float>(bgr[0]);
820 g = static_cast<float>(bgr[1]);
821 b = static_cast<float>(bgr[2]);
822 }
823 break;
824 case FORMAT_X32B32G32R32UI:
825 {
826 unsigned int* bgr = (unsigned int*)element;
827
828 r = static_cast<float>(bgr[0]);
829 g = static_cast<float>(bgr[1]);
830 b = static_cast<float>(bgr[2]);
831 }
832 break;
833 case FORMAT_G32R32I:
834 {
835 int* gr = (int*)element;
836
837 r = static_cast<float>(gr[0]);
838 g = static_cast<float>(gr[1]);
839 }
840 break;
841 case FORMAT_G32R32UI:
842 {
843 unsigned int* gr = (unsigned int*)element;
844
845 r = static_cast<float>(gr[0]);
846 g = static_cast<float>(gr[1]);
847 }
848 break;
849 case FORMAT_R32I:
850 r = static_cast<float>(*((int*)element));
851 break;
852 case FORMAT_R32UI:
853 r = static_cast<float>(*((unsigned int*)element));
854 break;
855 case FORMAT_V8U8:
856 {
857 unsigned short vu = *(unsigned short*)element;
858
859 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
860 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
861 }
862 break;
863 case FORMAT_L6V5U5:
864 {
865 unsigned short lvu = *(unsigned short*)element;
866
867 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
868 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
869 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
870 }
871 break;
872 case FORMAT_Q8W8V8U8:
873 {
874 unsigned int qwvu = *(unsigned int*)element;
875
876 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
877 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
878 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
879 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
880 }
881 break;
882 case FORMAT_X8L8V8U8:
883 {
884 unsigned int xlvu = *(unsigned int*)element;
885
886 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
887 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
888 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
889 }
890 break;
891 case FORMAT_R8G8B8:
892 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
893 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
894 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
895 break;
896 case FORMAT_B8G8R8:
897 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
898 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
899 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
900 break;
901 case FORMAT_V16U16:
902 {
903 unsigned int vu = *(unsigned int*)element;
904
905 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
906 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
907 }
908 break;
909 case FORMAT_A2W10V10U10:
910 {
911 unsigned int awvu = *(unsigned int*)element;
912
913 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
914 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
915 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
916 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
917 }
918 break;
919 case FORMAT_A16W16V16U16:
920 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
921 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
922 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
923 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
924 break;
925 case FORMAT_Q16W16V16U16:
926 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
927 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
928 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
929 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
930 break;
931 case FORMAT_L8:
932 r =
933 g =
934 b = *(unsigned char*)element * (1.0f / 0xFF);
935 break;
936 case FORMAT_A4L4:
937 {
938 unsigned char al = *(unsigned char*)element;
939
940 r =
941 g =
942 b = (al & 0x0F) * (1.0f / 0x0F);
943 a = (al & 0xF0) * (1.0f / 0xF0);
944 }
945 break;
946 case FORMAT_L16:
947 r =
948 g =
949 b = *(unsigned short*)element * (1.0f / 0xFFFF);
950 break;
951 case FORMAT_A8L8:
952 r =
953 g =
954 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
955 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
956 break;
957 case FORMAT_L16F:
958 r =
959 g =
960 b = *(half*)element;
961 break;
962 case FORMAT_A16L16F:
963 r =
964 g =
965 b = ((half*)element)[0];
966 a = ((half*)element)[1];
967 break;
968 case FORMAT_L32F:
969 r =
970 g =
971 b = *(float*)element;
972 break;
973 case FORMAT_A32L32F:
974 r =
975 g =
976 b = ((float*)element)[0];
977 a = ((float*)element)[1];
978 break;
979 case FORMAT_A16F:
980 a = *(half*)element;
981 break;
982 case FORMAT_R16F:
983 r = *(half*)element;
984 break;
985 case FORMAT_G16R16F:
986 r = ((half*)element)[0];
987 g = ((half*)element)[1];
988 break;
989 case FORMAT_X16B16G16R16F:
990 case FORMAT_X16B16G16R16F_UNSIGNED:
991 case FORMAT_B16G16R16F:
992 r = ((half*)element)[0];
993 g = ((half*)element)[1];
994 b = ((half*)element)[2];
995 break;
996 case FORMAT_A16B16G16R16F:
997 r = ((half*)element)[0];
998 g = ((half*)element)[1];
999 b = ((half*)element)[2];
1000 a = ((half*)element)[3];
1001 break;
1002 case FORMAT_A32F:
1003 a = *(float*)element;
1004 break;
1005 case FORMAT_R32F:
1006 r = *(float*)element;
1007 break;
1008 case FORMAT_G32R32F:
1009 r = ((float*)element)[0];
1010 g = ((float*)element)[1];
1011 break;
1012 case FORMAT_X32B32G32R32F:
1013 case FORMAT_X32B32G32R32F_UNSIGNED:
1014 case FORMAT_B32G32R32F:
1015 r = ((float*)element)[0];
1016 g = ((float*)element)[1];
1017 b = ((float*)element)[2];
1018 break;
1019 case FORMAT_A32B32G32R32F:
1020 r = ((float*)element)[0];
1021 g = ((float*)element)[1];
1022 b = ((float*)element)[2];
1023 a = ((float*)element)[3];
1024 break;
1025 case FORMAT_D32F:
1026 case FORMAT_D32FS8:
1027 case FORMAT_D32F_LOCKABLE:
1028 case FORMAT_D32FS8_TEXTURE:
1029 case FORMAT_D32F_SHADOW:
1030 case FORMAT_D32FS8_SHADOW:
1031 r = *(float*)element;
1032 g = r;
1033 b = r;
1034 a = r;
1035 break;
1036 case FORMAT_D32F_COMPLEMENTARY:
1037 case FORMAT_D32FS8_COMPLEMENTARY:
1038 r = 1.0f - *(float*)element;
1039 g = r;
1040 b = r;
1041 a = r;
1042 break;
1043 case FORMAT_S8:
1044 r = *(unsigned char*)element * (1.0f / 0xFF);
1045 break;
1046 default:
1047 ASSERT(false);
1048 }
1049
1050 if(isSRGBformat(format))
1051 {
1052 r = sRGBtoLinear(r);
1053 g = sRGBtoLinear(g);
1054 b = sRGBtoLinear(b);
1055 }
1056
1057 return Color<float>(r, g, b, a);
1058 }
1059
1060 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1061 {
1062 x -= 0.5f;
1063 y -= 0.5f;
1064 z -= 0.5f;
1065
1066 int x0 = clamp((int)x, 0, width - 1);
1067 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1068
1069 int y0 = clamp((int)y, 0, height - 1);
1070 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1071
1072 int z0 = clamp((int)z, 0, depth - 1);
1073 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1074
1075 Color<float> c000 = read(x0, y0, z0);
1076 Color<float> c100 = read(x1, y0, z0);
1077 Color<float> c010 = read(x0, y1, z0);
1078 Color<float> c110 = read(x1, y1, z0);
1079 Color<float> c001 = read(x0, y0, z1);
1080 Color<float> c101 = read(x1, y0, z1);
1081 Color<float> c011 = read(x0, y1, z1);
1082 Color<float> c111 = read(x1, y1, z1);
1083
1084 float fx = x - x0;
1085 float fy = y - y0;
1086 float fz = z - z0;
1087
1088 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1089 c100 *= fx * (1 - fy) * (1 - fz);
1090 c010 *= (1 - fx) * fy * (1 - fz);
1091 c110 *= fx * fy * (1 - fz);
1092 c001 *= (1 - fx) * (1 - fy) * fz;
1093 c101 *= fx * (1 - fy) * fz;
1094 c011 *= (1 - fx) * fy * fz;
1095 c111 *= fx * fy * fz;
1096
1097 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1098 }
1099
1100 Color<float> Surface::Buffer::sample(float x, float y, int layer) const
1101 {
1102 x -= 0.5f;
1103 y -= 0.5f;
1104
1105 int x0 = clamp((int)x, 0, width - 1);
1106 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1107
1108 int y0 = clamp((int)y, 0, height - 1);
1109 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1110
1111 Color<float> c00 = read(x0, y0, layer);
1112 Color<float> c10 = read(x1, y0, layer);
1113 Color<float> c01 = read(x0, y1, layer);
1114 Color<float> c11 = read(x1, y1, layer);
1115
1116 float fx = x - x0;
1117 float fy = y - y0;
1118
1119 c00 *= (1 - fx) * (1 - fy);
1120 c10 *= fx * (1 - fy);
1121 c01 *= (1 - fx) * fy;
1122 c11 *= fx * fy;
1123
1124 return c00 + c10 + c01 + c11;
1125 }
1126
1127 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
1128 {
1129 this->lock = lock;
1130
1131 switch(lock)
1132 {
1133 case LOCK_UNLOCKED:
1134 case LOCK_READONLY:
1135 case LOCK_UPDATE:
1136 break;
1137 case LOCK_WRITEONLY:
1138 case LOCK_READWRITE:
1139 case LOCK_DISCARD:
1140 dirty = true;
1141 break;
1142 default:
1143 ASSERT(false);
1144 }
1145
1146 if(buffer)
1147 {
1148 x += border;
1149 y += border;
1150
1151 switch(format)
1152 {
1153 case FORMAT_DXT1:
1154 case FORMAT_ATI1:
1155 case FORMAT_ETC1:
1156 case FORMAT_R11_EAC:
1157 case FORMAT_SIGNED_R11_EAC:
1158 case FORMAT_RGB8_ETC2:
1159 case FORMAT_SRGB8_ETC2:
1160 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1161 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1162 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1163 case FORMAT_RG11_EAC:
1164 case FORMAT_SIGNED_RG11_EAC:
1165 case FORMAT_RGBA8_ETC2_EAC:
1166 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1167 case FORMAT_RGBA_ASTC_4x4_KHR:
1168 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1169 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1170 case FORMAT_RGBA_ASTC_5x4_KHR:
1171 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1172 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1173 case FORMAT_RGBA_ASTC_5x5_KHR:
1174 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1175 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1176 case FORMAT_RGBA_ASTC_6x5_KHR:
1177 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1178 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1179 case FORMAT_RGBA_ASTC_6x6_KHR:
1180 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1181 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1182 case FORMAT_RGBA_ASTC_8x5_KHR:
1183 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1184 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1185 case FORMAT_RGBA_ASTC_8x6_KHR:
1186 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1187 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1188 case FORMAT_RGBA_ASTC_8x8_KHR:
1189 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1190 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1191 case FORMAT_RGBA_ASTC_10x5_KHR:
1192 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1193 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1194 case FORMAT_RGBA_ASTC_10x6_KHR:
1195 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1196 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1197 case FORMAT_RGBA_ASTC_10x8_KHR:
1198 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1199 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1200 case FORMAT_RGBA_ASTC_10x10_KHR:
1201 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1202 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1203 case FORMAT_RGBA_ASTC_12x10_KHR:
1204 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1205 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1206 case FORMAT_RGBA_ASTC_12x12_KHR:
1207 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1208 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
1209 case FORMAT_DXT3:
1210 case FORMAT_DXT5:
1211 case FORMAT_ATI2:
1212 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1213 default:
1214 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB;
1215 }
1216 }
1217
1218 return nullptr;
1219 }
1220
1221 void Surface::Buffer::unlockRect()
1222 {
1223 lock = LOCK_UNLOCKED;
1224 }
1225
1226 class SurfaceImplementation : public Surface
1227 {
1228 public:
1229 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1230 : Surface(width, height, depth, format, pixels, pitch, slice) {}
1231 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0)
1232 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {}
1233 ~SurfaceImplementation() override {}
1234
1235 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
1236 {
1237 return Surface::lockInternal(x, y, z, lock, client);
1238 }
1239
1240 void unlockInternal() override
1241 {
1242 Surface::unlockInternal();
1243 }
1244 };
1245
1246 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1247 {
1248 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
1249 }
1250
1251 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided)
1252 {
1253 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided);
1254 }
1255
1256 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1257 {
1258 resource = new Resource(0);
1259 hasParent = false;
1260 ownExternal = false;
1261 depth = max(1, depth);
1262
1263 external.buffer = pixels;
1264 external.width = width;
1265 external.height = height;
1266 external.depth = depth;
1267 external.samples = 1;
1268 external.format = format;
1269 external.bytes = bytes(external.format);
1270 external.pitchB = pitch;
1271 external.pitchP = external.bytes ? pitch / external.bytes : 0;
1272 external.sliceB = slice;
1273 external.sliceP = external.bytes ? slice / external.bytes : 0;
1274 external.border = 0;
1275 external.lock = LOCK_UNLOCKED;
1276 external.dirty = true;
1277
1278 internal.buffer = nullptr;
1279 internal.width = width;
1280 internal.height = height;
1281 internal.depth = depth;
1282 internal.samples = 1;
1283 internal.format = selectInternalFormat(format);
1284 internal.bytes = bytes(internal.format);
1285 internal.pitchB = pitchB(internal.width, 0, internal.format, false);
1286 internal.pitchP = pitchP(internal.width, 0, internal.format, false);
1287 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
1288 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
1289 internal.border = 0;
1290 internal.lock = LOCK_UNLOCKED;
1291 internal.dirty = false;
1292
1293 stencil.buffer = nullptr;
1294 stencil.width = width;
1295 stencil.height = height;
1296 stencil.depth = depth;
1297 stencil.samples = 1;
1298 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
1299 stencil.bytes = bytes(stencil.format);
1300 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
1301 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
1302 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
1303 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
1304 stencil.border = 0;
1305 stencil.lock = LOCK_UNLOCKED;
1306 stencil.dirty = false;
1307
1308 dirtyContents = true;
1309 paletteUsed = 0;
1310 }
1311
1312 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
1313 {
1314 resource = texture ? texture : new Resource(0);
1315 hasParent = texture != nullptr;
1316 ownExternal = true;
1317 depth = max(1, depth);
1318 samples = max(1, samples);
1319
1320 external.buffer = nullptr;
1321 external.width = width;
1322 external.height = height;
1323 external.depth = depth;
1324 external.samples = (short)samples;
1325 external.format = format;
1326 external.bytes = bytes(external.format);
1327 external.pitchB = !pitchPprovided ? pitchB(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided * external.bytes;
1328 external.pitchP = !pitchPprovided ? pitchP(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided;
1329 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
1330 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
1331 external.border = 0;
1332 external.lock = LOCK_UNLOCKED;
1333 external.dirty = false;
1334
1335 internal.buffer = nullptr;
1336 internal.width = width;
1337 internal.height = height;
1338 internal.depth = depth;
1339 internal.samples = (short)samples;
1340 internal.format = selectInternalFormat(format);
1341 internal.bytes = bytes(internal.format);
1342 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1343 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
1344 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
1345 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
1346 internal.border = (short)border;
1347 internal.lock = LOCK_UNLOCKED;
1348 internal.dirty = false;
1349
1350 stencil.buffer = nullptr;
1351 stencil.width = width;
1352 stencil.height = height;
1353 stencil.depth = depth;
1354 stencil.samples = (short)samples;
1355 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
1356 stencil.bytes = bytes(stencil.format);
1357 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
1358 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
1359 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1360 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1361 stencil.border = 0;
1362 stencil.lock = LOCK_UNLOCKED;
1363 stencil.dirty = false;
1364
1365 dirtyContents = true;
1366 paletteUsed = 0;
1367 }
1368
1369 Surface::~Surface()
1370 {
1371 // sync() must be called before this destructor to ensure all locks have been released.
1372 // We can't call it here because the parent resource may already have been destroyed.
1373 ASSERT(isUnlocked());
1374
1375 if(!hasParent)
1376 {
1377 resource->destruct();
1378 }
1379
1380 if(ownExternal)
1381 {
1382 deallocate(external.buffer);
1383 }
1384
1385 if(internal.buffer != external.buffer)
1386 {
1387 deallocate(internal.buffer);
1388 }
1389
1390 deallocate(stencil.buffer);
1391
1392 external.buffer = nullptr;
1393 internal.buffer = nullptr;
1394 stencil.buffer = nullptr;
1395 }
1396
1397 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
1398 {
1399 resource->lock(client);
1400
1401 if(!external.buffer)
1402 {
1403 if(internal.buffer && identicalBuffers())
1404 {
1405 external.buffer = internal.buffer;
1406 }
1407 else
1408 {
1409 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format);
1410 }
1411 }
1412
1413 if(internal.dirty)
1414 {
1415 if(lock != LOCK_DISCARD)
1416 {
1417 update(external, internal);
1418 }
1419
1420 internal.dirty = false;
1421 }
1422
1423 switch(lock)
1424 {
1425 case LOCK_READONLY:
1426 break;
1427 case LOCK_WRITEONLY:
1428 case LOCK_READWRITE:
1429 case LOCK_DISCARD:
1430 dirtyContents = true;
1431 break;
1432 default:
1433 ASSERT(false);
1434 }
1435
1436 return external.lockRect(x, y, z, lock);
1437 }
1438
1439 void Surface::unlockExternal()
1440 {
1441 external.unlockRect();
1442
1443 resource->unlock();
1444 }
1445
1446 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
1447 {
1448 if(lock != LOCK_UNLOCKED)
1449 {
1450 resource->lock(client);
1451 }
1452
1453 if(!internal.buffer)
1454 {
1455 if(external.buffer && identicalBuffers())
1456 {
1457 internal.buffer = external.buffer;
1458 }
1459 else
1460 {
1461 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format);
1462 }
1463 }
1464
1465 // FIXME: WHQL requires conversion to lower external precision and back
1466 if(logPrecision >= WHQL)
1467 {
1468 if(internal.dirty && renderTarget && internal.format != external.format)
1469 {
1470 if(lock != LOCK_DISCARD)
1471 {
1472 switch(external.format)
1473 {
1474 case FORMAT_R3G3B2:
1475 case FORMAT_A8R3G3B2:
1476 case FORMAT_A1R5G5B5:
1477 case FORMAT_A2R10G10B10:
1478 case FORMAT_A2B10G10R10:
1479 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1480 unlockExternal();
1481 break;
1482 default:
1483 // Difference passes WHQL
1484 break;
1485 }
1486 }
1487 }
1488 }
1489
1490 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
1491 {
1492 if(lock != LOCK_DISCARD)
1493 {
1494 update(internal, external);
1495 }
1496
1497 external.dirty = false;
1498 paletteUsed = Surface::paletteID;
1499 }
1500
1501 switch(lock)
1502 {
1503 case LOCK_UNLOCKED:
1504 case LOCK_READONLY:
1505 break;
1506 case LOCK_WRITEONLY:
1507 case LOCK_READWRITE:
1508 case LOCK_DISCARD:
1509 dirtyContents = true;
1510 break;
1511 default:
1512 ASSERT(false);
1513 }
1514
1515 if(lock == LOCK_READONLY && client == PUBLIC)
1516 {
1517 resolve();
1518 }
1519
1520 return internal.lockRect(x, y, z, lock);
1521 }
1522
1523 void Surface::unlockInternal()
1524 {
1525 internal.unlockRect();
1526
1527 resource->unlock();
1528 }
1529
1530 void *Surface::lockStencil(int x, int y, int front, Accessor client)
1531 {
1532 resource->lock(client);
1533
1534 if(stencil.format == FORMAT_NULL)
1535 {
1536 return nullptr;
1537 }
1538
1539 if(!stencil.buffer)
1540 {
1541 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format);
1542 }
1543
1544 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
1545 }
1546
1547 void Surface::unlockStencil()
1548 {
1549 stencil.unlockRect();
1550
1551 resource->unlock();
1552 }
1553
1554 int Surface::bytes(Format format)
1555 {
1556 switch(format)
1557 {
1558 case FORMAT_NULL: return 0;
1559 case FORMAT_P8: return 1;
1560 case FORMAT_A8P8: return 2;
1561 case FORMAT_A8: return 1;
1562 case FORMAT_R8I: return 1;
1563 case FORMAT_R8: return 1;
1564 case FORMAT_R3G3B2: return 1;
1565 case FORMAT_R16I: return 2;
1566 case FORMAT_R16UI: return 2;
1567 case FORMAT_A8R3G3B2: return 2;
1568 case FORMAT_R5G6B5: return 2;
1569 case FORMAT_A1R5G5B5: return 2;
1570 case FORMAT_X1R5G5B5: return 2;
1571 case FORMAT_R5G5B5A1: return 2;
1572 case FORMAT_X4R4G4B4: return 2;
1573 case FORMAT_A4R4G4B4: return 2;
1574 case FORMAT_R4G4B4A4: return 2;
1575 case FORMAT_R8G8B8: return 3;
1576 case FORMAT_B8G8R8: return 3;
1577 case FORMAT_R32I: return 4;
1578 case FORMAT_R32UI: return 4;
1579 case FORMAT_X8R8G8B8: return 4;
1580 // case FORMAT_X8G8R8B8Q: return 4;
1581 case FORMAT_A8R8G8B8: return 4;
1582 // case FORMAT_A8G8R8B8Q: return 4;
1583 case FORMAT_X8B8G8R8I: return 4;
1584 case FORMAT_X8B8G8R8: return 4;
1585 case FORMAT_SRGB8_X8: return 4;
1586 case FORMAT_SRGB8_A8: return 4;
1587 case FORMAT_A8B8G8R8I: return 4;
1588 case FORMAT_R8UI: return 1;
1589 case FORMAT_G8R8UI: return 2;
1590 case FORMAT_X8B8G8R8UI: return 4;
1591 case FORMAT_A8B8G8R8UI: return 4;
1592 case FORMAT_A8B8G8R8: return 4;
1593 case FORMAT_R8_SNORM: return 1;
1594 case FORMAT_G8R8_SNORM: return 2;
1595 case FORMAT_X8B8G8R8_SNORM: return 4;
1596 case FORMAT_A8B8G8R8_SNORM: return 4;
1597 case FORMAT_A2R10G10B10: return 4;
1598 case FORMAT_A2B10G10R10: return 4;
1599 case FORMAT_A2B10G10R10UI: return 4;
1600 case FORMAT_G8R8I: return 2;
1601 case FORMAT_G8R8: return 2;
1602 case FORMAT_G16R16I: return 4;
1603 case FORMAT_G16R16UI: return 4;
1604 case FORMAT_G16R16: return 4;
1605 case FORMAT_G32R32I: return 8;
1606 case FORMAT_G32R32UI: return 8;
1607 case FORMAT_X16B16G16R16I: return 8;
1608 case FORMAT_X16B16G16R16UI: return 8;
1609 case FORMAT_A16B16G16R16I: return 8;
1610 case FORMAT_A16B16G16R16UI: return 8;
1611 case FORMAT_A16B16G16R16: return 8;
1612 case FORMAT_X32B32G32R32I: return 16;
1613 case FORMAT_X32B32G32R32UI: return 16;
1614 case FORMAT_A32B32G32R32I: return 16;
1615 case FORMAT_A32B32G32R32UI: return 16;
1616 // Compressed formats
1617 case FORMAT_DXT1: return 2; // Column of four pixels
1618 case FORMAT_DXT3: return 4; // Column of four pixels
1619 case FORMAT_DXT5: return 4; // Column of four pixels
1620 case FORMAT_ATI1: return 2; // Column of four pixels
1621 case FORMAT_ATI2: return 4; // Column of four pixels
1622 case FORMAT_ETC1: return 2; // Column of four pixels
1623 case FORMAT_R11_EAC: return 2;
1624 case FORMAT_SIGNED_R11_EAC: return 2;
1625 case FORMAT_RG11_EAC: return 4;
1626 case FORMAT_SIGNED_RG11_EAC: return 4;
1627 case FORMAT_RGB8_ETC2: return 2;
1628 case FORMAT_SRGB8_ETC2: return 2;
1629 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1630 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1631 case FORMAT_RGBA8_ETC2_EAC: return 4;
1632 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1633 case FORMAT_RGBA_ASTC_4x4_KHR:
1634 case FORMAT_RGBA_ASTC_5x4_KHR:
1635 case FORMAT_RGBA_ASTC_5x5_KHR:
1636 case FORMAT_RGBA_ASTC_6x5_KHR:
1637 case FORMAT_RGBA_ASTC_6x6_KHR:
1638 case FORMAT_RGBA_ASTC_8x5_KHR:
1639 case FORMAT_RGBA_ASTC_8x6_KHR:
1640 case FORMAT_RGBA_ASTC_8x8_KHR:
1641 case FORMAT_RGBA_ASTC_10x5_KHR:
1642 case FORMAT_RGBA_ASTC_10x6_KHR:
1643 case FORMAT_RGBA_ASTC_10x8_KHR:
1644 case FORMAT_RGBA_ASTC_10x10_KHR:
1645 case FORMAT_RGBA_ASTC_12x10_KHR:
1646 case FORMAT_RGBA_ASTC_12x12_KHR:
1647 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1648 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1649 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1650 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1651 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1652 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1653 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1654 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1655 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1656 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1657 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1658 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1659 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1660 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
1661 // Bumpmap formats
1662 case FORMAT_V8U8: return 2;
1663 case FORMAT_L6V5U5: return 2;
1664 case FORMAT_Q8W8V8U8: return 4;
1665 case FORMAT_X8L8V8U8: return 4;
1666 case FORMAT_A2W10V10U10: return 4;
1667 case FORMAT_V16U16: return 4;
1668 case FORMAT_A16W16V16U16: return 8;
1669 case FORMAT_Q16W16V16U16: return 8;
1670 // Luminance formats
1671 case FORMAT_L8: return 1;
1672 case FORMAT_A4L4: return 1;
1673 case FORMAT_L16: return 2;
1674 case FORMAT_A8L8: return 2;
1675 case FORMAT_L16F: return 2;
1676 case FORMAT_A16L16F: return 4;
1677 case FORMAT_L32F: return 4;
1678 case FORMAT_A32L32F: return 8;
1679 // Floating-point formats
1680 case FORMAT_A16F: return 2;
1681 case FORMAT_R16F: return 2;
1682 case FORMAT_G16R16F: return 4;
1683 case FORMAT_B16G16R16F: return 6;
1684 case FORMAT_X16B16G16R16F: return 8;
1685 case FORMAT_A16B16G16R16F: return 8;
1686 case FORMAT_X16B16G16R16F_UNSIGNED: return 8;
1687 case FORMAT_A32F: return 4;
1688 case FORMAT_R32F: return 4;
1689 case FORMAT_G32R32F: return 8;
1690 case FORMAT_B32G32R32F: return 12;
1691 case FORMAT_X32B32G32R32F: return 16;
1692 case FORMAT_A32B32G32R32F: return 16;
1693 case FORMAT_X32B32G32R32F_UNSIGNED: return 16;
1694 // Depth/stencil formats
1695 case FORMAT_D16: return 2;
1696 case FORMAT_D32: return 4;
1697 case FORMAT_D24X8: return 4;
1698 case FORMAT_D24S8: return 4;
1699 case FORMAT_D24FS8: return 4;
1700 case FORMAT_D32F: return 4;
1701 case FORMAT_D32FS8: return 4;
1702 case FORMAT_D32F_COMPLEMENTARY: return 4;
1703 case FORMAT_D32FS8_COMPLEMENTARY: return 4;
1704 case FORMAT_D32F_LOCKABLE: return 4;
1705 case FORMAT_D32FS8_TEXTURE: return 4;
1706 case FORMAT_D32F_SHADOW: return 4;
1707 case FORMAT_D32FS8_SHADOW: return 4;
1708 case FORMAT_DF24S8: return 4;
1709 case FORMAT_DF16S8: return 2;
1710 case FORMAT_INTZ: return 4;
1711 case FORMAT_S8: return 1;
1712 case FORMAT_YV12_BT601: return 1; // Y plane only
1713 case FORMAT_YV12_BT709: return 1; // Y plane only
1714 case FORMAT_YV12_JFIF: return 1; // Y plane only
1715 default:
1716 ASSERT(false);
1717 }
1718
1719 return 0;
1720 }
1721
1722 int Surface::pitchB(int width, int border, Format format, bool target)
1723 {
1724 width += 2 * border;
1725
1726 // Render targets require 2x2 quads
1727 if(target || isDepth(format) || isStencil(format))
1728 {
1729 width = align<2>(width);
1730 }
1731
1732 switch(format)
1733 {
1734 case FORMAT_DXT1:
1735 case FORMAT_ETC1:
1736 case FORMAT_R11_EAC:
1737 case FORMAT_SIGNED_R11_EAC:
1738 case FORMAT_RGB8_ETC2:
1739 case FORMAT_SRGB8_ETC2:
1740 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1741 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1742 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
1743 case FORMAT_RG11_EAC:
1744 case FORMAT_SIGNED_RG11_EAC:
1745 case FORMAT_RGBA8_ETC2_EAC:
1746 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1747 case FORMAT_RGBA_ASTC_4x4_KHR:
1748 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1749 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1750 case FORMAT_RGBA_ASTC_5x4_KHR:
1751 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1752 case FORMAT_RGBA_ASTC_5x5_KHR:
1753 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1754 return 16 * ((width + 4) / 5);
1755 case FORMAT_RGBA_ASTC_6x5_KHR:
1756 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1757 case FORMAT_RGBA_ASTC_6x6_KHR:
1758 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1759 return 16 * ((width + 5) / 6);
1760 case FORMAT_RGBA_ASTC_8x5_KHR:
1761 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1762 case FORMAT_RGBA_ASTC_8x6_KHR:
1763 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1764 case FORMAT_RGBA_ASTC_8x8_KHR:
1765 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1766 return 16 * ((width + 7) / 8);
1767 case FORMAT_RGBA_ASTC_10x5_KHR:
1768 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1769 case FORMAT_RGBA_ASTC_10x6_KHR:
1770 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1771 case FORMAT_RGBA_ASTC_10x8_KHR:
1772 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1773 case FORMAT_RGBA_ASTC_10x10_KHR:
1774 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1775 return 16 * ((width + 9) / 10);
1776 case FORMAT_RGBA_ASTC_12x10_KHR:
1777 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1778 case FORMAT_RGBA_ASTC_12x12_KHR:
1779 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1780 return 16 * ((width + 11) / 12);
1781 case FORMAT_DXT3:
1782 case FORMAT_DXT5:
1783 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1784 case FORMAT_ATI1:
1785 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1786 case FORMAT_ATI2:
1787 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
1788 case FORMAT_YV12_BT601:
1789 case FORMAT_YV12_BT709:
1790 case FORMAT_YV12_JFIF:
1791 return align<16>(width);
1792 default:
1793 return bytes(format) * width;
1794 }
1795 }
1796
1797 int Surface::pitchP(int width, int border, Format format, bool target)
1798 {
1799 int B = bytes(format);
1800
1801 return B > 0 ? pitchB(width, border, format, target) / B : 0;
1802 }
1803
1804 int Surface::sliceB(int width, int height, int border, Format format, bool target)
1805 {
1806 height += 2 * border;
1807
1808 // Render targets require 2x2 quads
1809 if(target || isDepth(format) || isStencil(format))
1810 {
1811 height = align<2>(height);
1812 }
1813
1814 switch(format)
1815 {
1816 case FORMAT_DXT1:
1817 case FORMAT_DXT3:
1818 case FORMAT_DXT5:
1819 case FORMAT_ETC1:
1820 case FORMAT_R11_EAC:
1821 case FORMAT_SIGNED_R11_EAC:
1822 case FORMAT_RG11_EAC:
1823 case FORMAT_SIGNED_RG11_EAC:
1824 case FORMAT_RGB8_ETC2:
1825 case FORMAT_SRGB8_ETC2:
1826 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1827 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1828 case FORMAT_RGBA8_ETC2_EAC:
1829 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1830 case FORMAT_RGBA_ASTC_4x4_KHR:
1831 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1832 case FORMAT_RGBA_ASTC_5x4_KHR:
1833 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1834 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
1835 case FORMAT_RGBA_ASTC_5x5_KHR:
1836 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1837 case FORMAT_RGBA_ASTC_6x5_KHR:
1838 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1839 case FORMAT_RGBA_ASTC_8x5_KHR:
1840 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1841 case FORMAT_RGBA_ASTC_10x5_KHR:
1842 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1843 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1844 case FORMAT_RGBA_ASTC_6x6_KHR:
1845 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1846 case FORMAT_RGBA_ASTC_8x6_KHR:
1847 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1848 case FORMAT_RGBA_ASTC_10x6_KHR:
1849 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1850 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1851 case FORMAT_RGBA_ASTC_8x8_KHR:
1852 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1853 case FORMAT_RGBA_ASTC_10x8_KHR:
1854 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1855 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1856 case FORMAT_RGBA_ASTC_10x10_KHR:
1857 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1858 case FORMAT_RGBA_ASTC_12x10_KHR:
1859 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1860 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1861 case FORMAT_RGBA_ASTC_12x12_KHR:
1862 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1863 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
1864 case FORMAT_ATI1:
1865 case FORMAT_ATI2:
1866 return pitchB(width, border, format, target) * align<4>(height); // Pitch computed per row
1867 default:
1868 return pitchB(width, border, format, target) * height; // Pitch computed per row
1869 }
1870 }
1871
1872 int Surface::sliceP(int width, int height, int border, Format format, bool target)
1873 {
1874 int B = bytes(format);
1875
1876 return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
1877 }
1878
1879 void Surface::update(Buffer &destination, Buffer &source)
1880 {
1881 // ASSERT(source.lock != LOCK_UNLOCKED);
1882 // ASSERT(destination.lock != LOCK_UNLOCKED);
1883
1884 if(destination.buffer != source.buffer)
1885 {
1886 ASSERT(source.dirty && !destination.dirty);
1887
1888 switch(source.format)
1889 {
1890 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
1891 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1892 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1893 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1894 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1895 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1896 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1897 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1898 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
1899 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1900 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
1901 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1902 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1903 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1904 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
1905 case FORMAT_ETC1:
1906 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1907 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1908 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1909 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1910 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1911 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1912 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1913 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1914 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1915 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1916 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1917 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1918 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1919 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1920 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1921 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1922 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1923 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1924 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1925 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1926 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1927 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1928 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1929 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1930 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1931 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1932 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1933 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1934 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1935 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1936 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1937 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1938 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1939 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
1940 default: genericUpdate(destination, source); break;
1941 }
1942 }
1943 }
1944
1945 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1946 {
1947 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1948 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
1949
1950 int depth = min(destination.depth, source.depth);
1951 int height = min(destination.height, source.height);
1952 int width = min(destination.width, source.width);
1953 int rowBytes = width * source.bytes;
1954
1955 for(int z = 0; z < depth; z++)
1956 {
1957 unsigned char *sourceRow = sourceSlice;
1958 unsigned char *destinationRow = destinationSlice;
1959
1960 for(int y = 0; y < height; y++)
1961 {
1962 if(source.format == destination.format)
1963 {
1964 memcpy(destinationRow, sourceRow, rowBytes);
1965 }
1966 else
1967 {
1968 unsigned char *sourceElement = sourceRow;
1969 unsigned char *destinationElement = destinationRow;
1970
1971 for(int x = 0; x < width; x++)
1972 {
1973 Color<float> color = source.read(sourceElement);
1974 destination.write(destinationElement, color);
1975
1976 sourceElement += source.bytes;
1977 destinationElement += destination.bytes;
1978 }
1979 }
1980
1981 sourceRow += source.pitchB;
1982 destinationRow += destination.pitchB;
1983 }
1984
1985 sourceSlice += source.sliceB;
1986 destinationSlice += destination.sliceB;
1987 }
1988
1989 source.unlockRect();
1990 destination.unlockRect();
1991 }
1992
1993 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
1994 {
1995 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1996 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
1997
1998 int depth = min(destination.depth, source.depth);
1999 int height = min(destination.height, source.height);
2000 int width = min(destination.width, source.width);
2001
2002 for(int z = 0; z < depth; z++)
2003 {
2004 unsigned char *sourceRow = sourceSlice;
2005 unsigned char *destinationRow = destinationSlice;
2006
2007 for(int y = 0; y < height; y++)
2008 {
2009 unsigned char *sourceElement = sourceRow;
2010 unsigned char *destinationElement = destinationRow;
2011
2012 for(int x = 0; x < width; x++)
2013 {
2014 unsigned int b = sourceElement[0];
2015 unsigned int g = sourceElement[1];
2016 unsigned int r = sourceElement[2];
2017
2018 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
2019
2020 sourceElement += source.bytes;
2021 destinationElement += destination.bytes;
2022 }
2023
2024 sourceRow += source.pitchB;
2025 destinationRow += destination.pitchB;
2026 }
2027
2028 sourceSlice += source.sliceB;
2029 destinationSlice += destination.sliceB;
2030 }
2031
2032 source.unlockRect();
2033 destination.unlockRect();
2034 }
2035
2036 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
2037 {
2038 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2039 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
2040
2041 int depth = min(destination.depth, source.depth);
2042 int height = min(destination.height, source.height);
2043 int width = min(destination.width, source.width);
2044
2045 for(int z = 0; z < depth; z++)
2046 {
2047 unsigned char *sourceRow = sourceSlice;
2048 unsigned char *destinationRow = destinationSlice;
2049
2050 for(int y = 0; y < height; y++)
2051 {
2052 unsigned char *sourceElement = sourceRow;
2053 unsigned char *destinationElement = destinationRow;
2054
2055 for(int x = 0; x < width; x++)
2056 {
2057 unsigned int xrgb = *(unsigned short*)sourceElement;
2058
2059 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2060 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2061 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
2062
2063 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2064
2065 sourceElement += source.bytes;
2066 destinationElement += destination.bytes;
2067 }
2068
2069 sourceRow += source.pitchB;
2070 destinationRow += destination.pitchB;
2071 }
2072
2073 sourceSlice += source.sliceB;
2074 destinationSlice += destination.sliceB;
2075 }
2076
2077 source.unlockRect();
2078 destination.unlockRect();
2079 }
2080
2081 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
2082 {
2083 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2084 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
2085
2086 int depth = min(destination.depth, source.depth);
2087 int height = min(destination.height, source.height);
2088 int width = min(destination.width, source.width);
2089
2090 for(int z = 0; z < depth; z++)
2091 {
2092 unsigned char *sourceRow = sourceSlice;
2093 unsigned char *destinationRow = destinationSlice;
2094
2095 for(int y = 0; y < height; y++)
2096 {
2097 unsigned char *sourceElement = sourceRow;
2098 unsigned char *destinationElement = destinationRow;
2099
2100 for(int x = 0; x < width; x++)
2101 {
2102 unsigned int argb = *(unsigned short*)sourceElement;
2103
2104 unsigned int a = (argb & 0x8000) * 130560;
2105 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2106 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2107 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
2108
2109 *(unsigned int*)destinationElement = a | r | g | b;
2110
2111 sourceElement += source.bytes;
2112 destinationElement += destination.bytes;
2113 }
2114
2115 sourceRow += source.pitchB;
2116 destinationRow += destination.pitchB;
2117 }
2118
2119 sourceSlice += source.sliceB;
2120 destinationSlice += destination.sliceB;
2121 }
2122
2123 source.unlockRect();
2124 destination.unlockRect();
2125 }
2126
2127 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
2128 {
2129 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2130 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
2131
2132 int depth = min(destination.depth, source.depth);
2133 int height = min(destination.height, source.height);
2134 int width = min(destination.width, source.width);
2135
2136 for(int z = 0; z < depth; z++)
2137 {
2138 unsigned char *sourceRow = sourceSlice;
2139 unsigned char *destinationRow = destinationSlice;
2140
2141 for(int y = 0; y < height; y++)
2142 {
2143 unsigned char *sourceElement = sourceRow;
2144 unsigned char *destinationElement = destinationRow;
2145
2146 for(int x = 0; x < width; x++)
2147 {
2148 unsigned int xrgb = *(unsigned short*)sourceElement;
2149
2150 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2151 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2152 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2153
2154 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2155
2156 sourceElement += source.bytes;
2157 destinationElement += destination.bytes;
2158 }
2159
2160 sourceRow += source.pitchB;
2161 destinationRow += destination.pitchB;
2162 }
2163
2164 sourceSlice += source.sliceB;
2165 destinationSlice += destination.sliceB;
2166 }
2167
2168 source.unlockRect();
2169 destination.unlockRect();
2170 }
2171
2172 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
2173 {
2174 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2175 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
2176
2177 int depth = min(destination.depth, source.depth);
2178 int height = min(destination.height, source.height);
2179 int width = min(destination.width, source.width);
2180
2181 for(int z = 0; z < depth; z++)
2182 {
2183 unsigned char *sourceRow = sourceSlice;
2184 unsigned char *destinationRow = destinationSlice;
2185
2186 for(int y = 0; y < height; y++)
2187 {
2188 unsigned char *sourceElement = sourceRow;
2189 unsigned char *destinationElement = destinationRow;
2190
2191 for(int x = 0; x < width; x++)
2192 {
2193 unsigned int argb = *(unsigned short*)sourceElement;
2194
2195 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2196 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2197 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2198 unsigned int b = (argb & 0x000F) * 0x00000011;
2199
2200 *(unsigned int*)destinationElement = a | r | g | b;
2201
2202 sourceElement += source.bytes;
2203 destinationElement += destination.bytes;
2204 }
2205
2206 sourceRow += source.pitchB;
2207 destinationRow += destination.pitchB;
2208 }
2209
2210 sourceSlice += source.sliceB;
2211 destinationSlice += destination.sliceB;
2212 }
2213
2214 source.unlockRect();
2215 destination.unlockRect();
2216 }
2217
2218 void Surface::decodeP8(Buffer &destination, Buffer &source)
2219 {
2220 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2221 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE);
2222
2223 int depth = min(destination.depth, source.depth);
2224 int height = min(destination.height, source.height);
2225 int width = min(destination.width, source.width);
2226
2227 for(int z = 0; z < depth; z++)
2228 {
2229 unsigned char *sourceRow = sourceSlice;
2230 unsigned char *destinationRow = destinationSlice;
2231
2232 for(int y = 0; y < height; y++)
2233 {
2234 unsigned char *sourceElement = sourceRow;
2235 unsigned char *destinationElement = destinationRow;
2236
2237 for(int x = 0; x < width; x++)
2238 {
2239 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2240
2241 unsigned int r = (abgr & 0x000000FF) << 16;
2242 unsigned int g = (abgr & 0x0000FF00) << 0;
2243 unsigned int b = (abgr & 0x00FF0000) >> 16;
2244 unsigned int a = (abgr & 0xFF000000) >> 0;
2245
2246 *(unsigned int*)destinationElement = a | r | g | b;
2247
2248 sourceElement += source.bytes;
2249 destinationElement += destination.bytes;
2250 }
2251
2252 sourceRow += source.pitchB;
2253 destinationRow += destination.pitchB;
2254 }
2255
2256 sourceSlice += source.sliceB;
2257 destinationSlice += destination.sliceB;
2258 }
2259
2260 source.unlockRect();
2261 destination.unlockRect();
2262 }
2263
2264 void Surface::decodeDXT1(Buffer &internal, Buffer &external)
2265 {
2266 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
2267 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
2268
2269 for(int z = 0; z < external.depth; z++)
2270 {
2271 unsigned int *dest = destSlice;
2272
2273 for(int y = 0; y < external.height; y += 4)
2274 {
2275 for(int x = 0; x < external.width; x += 4)
2276 {
2277 Color<byte> c[4];
2278
2279 c[0] = source->c0;
2280 c[1] = source->c1;
2281
2282 if(source->c0 > source->c1) // No transparency
2283 {
2284 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2285 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2286 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2287 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2288 c[2].a = 0xFF;
2289
2290 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2291 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2292 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2293 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2294 c[3].a = 0xFF;
2295 }
2296 else // c3 transparent
2297 {
2298 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2299 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2300 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2301 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2302 c[2].a = 0xFF;
2303
2304 c[3].r = 0;
2305 c[3].g = 0;
2306 c[3].b = 0;
2307 c[3].a = 0;
2308 }
2309
2310 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2311 {
2312 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2313 {
2314 dest[(x + i) + (y + j) * internal.pitchP] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2315 }
2316 }
2317
2318 source++;
2319 }
2320 }
2321
2322 (byte*&)destSlice += internal.sliceB;
2323 }
2324
2325 external.unlockRect();
2326 internal.unlockRect();
2327 }
2328
2329 void Surface::decodeDXT3(Buffer &internal, Buffer &external)
2330 {
2331 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
2332 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
2333
2334 for(int z = 0; z < external.depth; z++)
2335 {
2336 unsigned int *dest = destSlice;
2337
2338 for(int y = 0; y < external.height; y += 4)
2339 {
2340 for(int x = 0; x < external.width; x += 4)
2341 {
2342 Color<byte> c[4];
2343
2344 c[0] = source->c0;
2345 c[1] = source->c1;
2346
2347 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2348 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2349 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2350 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2351
2352 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2353 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2354 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2355 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2356
2357 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2358 {
2359 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2360 {
2361 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2362 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2363
2364 dest[(x + i) + (y + j) * internal.pitchP] = color;
2365 }
2366 }
2367
2368 source++;
2369 }
2370 }
2371
2372 (byte*&)destSlice += internal.sliceB;
2373 }
2374
2375 external.unlockRect();
2376 internal.unlockRect();
2377 }
2378
2379 void Surface::decodeDXT5(Buffer &internal, Buffer &external)
2380 {
2381 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
2382 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
2383
2384 for(int z = 0; z < external.depth; z++)
2385 {
2386 unsigned int *dest = destSlice;
2387
2388 for(int y = 0; y < external.height; y += 4)
2389 {
2390 for(int x = 0; x < external.width; x += 4)
2391 {
2392 Color<byte> c[4];
2393
2394 c[0] = source->c0;
2395 c[1] = source->c1;
2396
2397 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2398 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2399 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2400 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2401
2402 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2403 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2404 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2405 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2406
2407 byte a[8];
2408
2409 a[0] = source->a0;
2410 a[1] = source->a1;
2411
2412 if(a[0] > a[1])
2413 {
2414 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2415 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2416 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2417 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2418 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2419 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2420 }
2421 else
2422 {
2423 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2424 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2425 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2426 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2427 a[6] = 0;
2428 a[7] = 0xFF;
2429 }
2430
2431 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2432 {
2433 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2434 {
2435 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2436 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
2437
2438 dest[(x + i) + (y + j) * internal.pitchP] = color;
2439 }
2440 }
2441
2442 source++;
2443 }
2444 }
2445
2446 (byte*&)destSlice += internal.sliceB;
2447 }
2448
2449 external.unlockRect();
2450 internal.unlockRect();
2451 }
2452
2453 void Surface::decodeATI1(Buffer &internal, Buffer &external)
2454 {
2455 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
2456 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
2457
2458 for(int z = 0; z < external.depth; z++)
2459 {
2460 byte *dest = destSlice;
2461
2462 for(int y = 0; y < external.height; y += 4)
2463 {
2464 for(int x = 0; x < external.width; x += 4)
2465 {
2466 byte r[8];
2467
2468 r[0] = source->r0;
2469 r[1] = source->r1;
2470
2471 if(r[0] > r[1])
2472 {
2473 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2474 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2475 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2476 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2477 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2478 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2479 }
2480 else
2481 {
2482 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2483 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2484 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2485 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2486 r[6] = 0;
2487 r[7] = 0xFF;
2488 }
2489
2490 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2491 {
2492 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2493 {
2494 dest[(x + i) + (y + j) * internal.pitchP] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2495 }
2496 }
2497
2498 source++;
2499 }
2500 }
2501
2502 destSlice += internal.sliceB;
2503 }
2504
2505 external.unlockRect();
2506 internal.unlockRect();
2507 }
2508
2509 void Surface::decodeATI2(Buffer &internal, Buffer &external)
2510 {
2511 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE);
2512 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
2513
2514 for(int z = 0; z < external.depth; z++)
2515 {
2516 word *dest = destSlice;
2517
2518 for(int y = 0; y < external.height; y += 4)
2519 {
2520 for(int x = 0; x < external.width; x += 4)
2521 {
2522 byte X[8];
2523
2524 X[0] = source->x0;
2525 X[1] = source->x1;
2526
2527 if(X[0] > X[1])
2528 {
2529 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2530 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2531 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2532 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2533 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2534 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2535 }
2536 else
2537 {
2538 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2539 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2540 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2541 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2542 X[6] = 0;
2543 X[7] = 0xFF;
2544 }
2545
2546 byte Y[8];
2547
2548 Y[0] = source->y0;
2549 Y[1] = source->y1;
2550
2551 if(Y[0] > Y[1])
2552 {
2553 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2554 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2555 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2556 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2557 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2558 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2559 }
2560 else
2561 {
2562 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2563 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2564 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2565 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2566 Y[6] = 0;
2567 Y[7] = 0xFF;
2568 }
2569
2570 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2571 {
2572 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2573 {
2574 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2575 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2576
2577 dest[(x + i) + (y + j) * internal.pitchP] = (g << 8) + r;
2578 }
2579 }
2580
2581 source++;
2582 }
2583 }
2584
2585 (byte*&)destSlice += internal.sliceB;
2586 }
2587
2588 external.unlockRect();
2589 internal.unlockRect();
2590 }
2591
2592 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
2593 {
2594 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2595 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
2596 external.unlockRect();
2597 internal.unlockRect();
2598
2599 if(isSRGB)
2600 {
2601 static byte sRGBtoLinearTable[256];
2602 static bool sRGBtoLinearTableDirty = true;
2603 if(sRGBtoLinearTableDirty)
2604 {
2605 for(int i = 0; i < 256; i++)
2606 {
2607 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
2608 }
2609 sRGBtoLinearTableDirty = false;
2610 }
2611
2612 // Perform sRGB conversion in place after decoding
2613 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
2614 for(int y = 0; y < internal.height; y++)
2615 {
2616 byte *srcRow = src + y * internal.pitchB;
2617 for(int x = 0; x < internal.width; x++)
2618 {
2619 byte *srcPix = srcRow + x * internal.bytes;
2620 for(int i = 0; i < 3; i++)
2621 {
2622 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2623 }
2624 }
2625 }
2626 internal.unlockRect();
2627 }
2628 }
2629
2630 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
2631 {
2632 ASSERT(nbChannels == 1 || nbChannels == 2);
2633
2634 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
2635 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2636 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2637 external.unlockRect();
2638
2639 // FIXME: We convert EAC data to float, until signed short internal formats are supported
2640 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats
2641 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f));
2642 for(int y = 0; y < internal.height; y++)
2643 {
2644 byte* srcRow = src + y * internal.pitchB;
2645 for(int x = internal.width - 1; x >= 0; x--)
2646 {
2647 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes);
2648 float* dstPix = reinterpret_cast<float*>(srcPix);
2649 for(int c = nbChannels - 1; c >= 0; c--)
2650 {
2651 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2652 }
2653 }
2654 }
2655
2656 internal.unlockRect();
2657 }
2658
2659 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2660 {
2661 }
2662
2663 size_t Surface::size(int width, int height, int depth, int border, int samples, Format format)
2664 {
2665 samples = max(1, samples);
2666
2667 switch(format)
2668 {
2669 default:
2670 {
2671 uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples;
2672
2673 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
2674 // and stencil operations also read 8 bytes per four 8-bit stencil values,
2675 // so we have to allocate 4 extra bytes to avoid buffer overruns.
2676 size += 4;
2677
2678 // We can only sample buffers smaller than 2 GiB.
2679 // Force an out-of-memory if larger, or let the caller report an error.
2680 return size < 0x80000000u ? (size_t)size : std::numeric_limits<size_t>::max();
2681 }
2682 case FORMAT_YV12_BT601:
2683 case FORMAT_YV12_BT709:
2684 case FORMAT_YV12_JFIF:
2685 {
2686 width += 2 * border;
2687 height += 2 * border;
2688
2689 size_t YStride = align<16>(width);
2690 size_t YSize = YStride * height;
2691 size_t CStride = align<16>(YStride / 2);
2692 size_t CSize = CStride * height / 2;
2693
2694 return YSize + 2 * CSize;
2695 }
2696 }
2697 }
2698
2699 bool Surface::isStencil(Format format)
2700 {
2701 switch(format)
2702 {
2703 case FORMAT_D32:
2704 case FORMAT_D16:
2705 case FORMAT_D24X8:
2706 case FORMAT_D32F:
2707 case FORMAT_D32F_COMPLEMENTARY:
2708 case FORMAT_D32F_LOCKABLE:
2709 case FORMAT_D32F_SHADOW:
2710 return false;
2711 case FORMAT_D24S8:
2712 case FORMAT_D24FS8:
2713 case FORMAT_S8:
2714 case FORMAT_DF24S8:
2715 case FORMAT_DF16S8:
2716 case FORMAT_D32FS8_TEXTURE:
2717 case FORMAT_D32FS8_SHADOW:
2718 case FORMAT_D32FS8:
2719 case FORMAT_D32FS8_COMPLEMENTARY:
2720 case FORMAT_INTZ:
2721 return true;
2722 default:
2723 return false;
2724 }
2725 }
2726
2727 bool Surface::isDepth(Format format)
2728 {
2729 switch(format)
2730 {
2731 case FORMAT_D32:
2732 case FORMAT_D16:
2733 case FORMAT_D24X8:
2734 case FORMAT_D24S8:
2735 case FORMAT_D24FS8:
2736 case FORMAT_D32F:
2737 case FORMAT_D32FS8:
2738 case FORMAT_D32F_COMPLEMENTARY:
2739 case FORMAT_D32FS8_COMPLEMENTARY:
2740 case FORMAT_D32F_LOCKABLE:
2741 case FORMAT_DF24S8:
2742 case FORMAT_DF16S8:
2743 case FORMAT_D32FS8_TEXTURE:
2744 case FORMAT_D32F_SHADOW:
2745 case FORMAT_D32FS8_SHADOW:
2746 case FORMAT_INTZ:
2747 return true;
2748 case FORMAT_S8:
2749 return false;
2750 default:
2751 return false;
2752 }
2753 }
2754
2755 bool Surface::hasQuadLayout(Format format)
2756 {
2757 switch(format)
2758 {
2759 case FORMAT_D32:
2760 case FORMAT_D16:
2761 case FORMAT_D24X8:
2762 case FORMAT_D24S8:
2763 case FORMAT_D24FS8:
2764 case FORMAT_D32F:
2765 case FORMAT_D32FS8:
2766 case FORMAT_D32F_COMPLEMENTARY:
2767 case FORMAT_D32FS8_COMPLEMENTARY:
2768 case FORMAT_DF24S8:
2769 case FORMAT_DF16S8:
2770 case FORMAT_INTZ:
2771 case FORMAT_S8:
2772 case FORMAT_A8G8R8B8Q:
2773 case FORMAT_X8G8R8B8Q:
2774 return true;
2775 case FORMAT_D32F_LOCKABLE:
2776 case FORMAT_D32FS8_TEXTURE:
2777 case FORMAT_D32F_SHADOW:
2778 case FORMAT_D32FS8_SHADOW:
2779 default:
2780 break;
2781 }
2782
2783 return false;
2784 }
2785
2786 bool Surface::isPalette(Format format)
2787 {
2788 switch(format)
2789 {
2790 case FORMAT_P8:
2791 case FORMAT_A8P8:
2792 return true;
2793 default:
2794 return false;
2795 }
2796 }
2797
2798 bool Surface::isFloatFormat(Format format)
2799 {
2800 switch(format)
2801 {
2802 case FORMAT_R5G6B5:
2803 case FORMAT_R8G8B8:
2804 case FORMAT_B8G8R8:
2805 case FORMAT_X8R8G8B8:
2806 case FORMAT_X8B8G8R8I:
2807 case FORMAT_X8B8G8R8:
2808 case FORMAT_A8R8G8B8:
2809 case FORMAT_SRGB8_X8:
2810 case FORMAT_SRGB8_A8:
2811 case FORMAT_A8B8G8R8I:
2812 case FORMAT_R8UI:
2813 case FORMAT_G8R8UI:
2814 case FORMAT_X8B8G8R8UI:
2815 case FORMAT_A8B8G8R8UI:
2816 case FORMAT_A8B8G8R8:
2817 case FORMAT_G8R8I:
2818 case FORMAT_G8R8:
2819 case FORMAT_A2B10G10R10:
2820 case FORMAT_A2B10G10R10UI:
2821 case FORMAT_R8_SNORM:
2822 case FORMAT_G8R8_SNORM:
2823 case FORMAT_X8B8G8R8_SNORM:
2824 case FORMAT_A8B8G8R8_SNORM:
2825 case FORMAT_R16I:
2826 case FORMAT_R16UI:
2827 case FORMAT_G16R16I:
2828 case FORMAT_G16R16UI:
2829 case FORMAT_G16R16:
2830 case FORMAT_X16B16G16R16I:
2831 case FORMAT_X16B16G16R16UI:
2832 case FORMAT_A16B16G16R16I:
2833 case FORMAT_A16B16G16R16UI:
2834 case FORMAT_A16B16G16R16:
2835 case FORMAT_V8U8:
2836 case FORMAT_Q8W8V8U8:
2837 case FORMAT_X8L8V8U8:
2838 case FORMAT_V16U16:
2839 case FORMAT_A16W16V16U16:
2840 case FORMAT_Q16W16V16U16:
2841 case FORMAT_A8:
2842 case FORMAT_R8I:
2843 case FORMAT_R8:
2844 case FORMAT_S8:
2845 case FORMAT_L8:
2846 case FORMAT_L16:
2847 case FORMAT_A8L8:
2848 case FORMAT_YV12_BT601:
2849 case FORMAT_YV12_BT709:
2850 case FORMAT_YV12_JFIF:
2851 case FORMAT_R32I:
2852 case FORMAT_R32UI:
2853 case FORMAT_G32R32I:
2854 case FORMAT_G32R32UI:
2855 case FORMAT_X32B32G32R32I:
2856 case FORMAT_X32B32G32R32UI:
2857 case FORMAT_A32B32G32R32I:
2858 case FORMAT_A32B32G32R32UI:
2859 return false;
2860 case FORMAT_R16F:
2861 case FORMAT_G16R16F:
2862 case FORMAT_B16G16R16F:
2863 case FORMAT_X16B16G16R16F:
2864 case FORMAT_A16B16G16R16F:
2865 case FORMAT_X16B16G16R16F_UNSIGNED:
2866 case FORMAT_R32F:
2867 case FORMAT_G32R32F:
2868 case FORMAT_B32G32R32F:
2869 case FORMAT_X32B32G32R32F:
2870 case FORMAT_A32B32G32R32F:
2871 case FORMAT_X32B32G32R32F_UNSIGNED:
2872 case FORMAT_D32F:
2873 case FORMAT_D32FS8:
2874 case FORMAT_D32F_COMPLEMENTARY:
2875 case FORMAT_D32FS8_COMPLEMENTARY:
2876 case FORMAT_D32F_LOCKABLE:
2877 case FORMAT_D32FS8_TEXTURE:
2878 case FORMAT_D32F_SHADOW:
2879 case FORMAT_D32FS8_SHADOW:
2880 case FORMAT_L16F:
2881 case FORMAT_A16L16F:
2882 case FORMAT_L32F:
2883 case FORMAT_A32L32F:
2884 return true;
2885 default:
2886 ASSERT(false);
2887 }
2888
2889 return false;
2890 }
2891
2892 bool Surface::isUnsignedComponent(Format format, int component)
2893 {
2894 switch(format)
2895 {
2896 case FORMAT_NULL:
2897 case FORMAT_R5G6B5:
2898 case FORMAT_R8G8B8:
2899 case FORMAT_B8G8R8:
2900 case FORMAT_X8R8G8B8:
2901 case FORMAT_X8B8G8R8:
2902 case FORMAT_A8R8G8B8:
2903 case FORMAT_A8B8G8R8:
2904 case FORMAT_SRGB8_X8:
2905 case FORMAT_SRGB8_A8:
2906 case FORMAT_G8R8:
2907 case FORMAT_A2B10G10R10:
2908 case FORMAT_A2B10G10R10UI:
2909 case FORMAT_R16UI:
2910 case FORMAT_G16R16:
2911 case FORMAT_G16R16UI:
2912 case FORMAT_X16B16G16R16UI:
2913 case FORMAT_A16B16G16R16:
2914 case FORMAT_A16B16G16R16UI:
2915 case FORMAT_R32UI:
2916 case FORMAT_G32R32UI:
2917 case FORMAT_X32B32G32R32UI:
2918 case FORMAT_A32B32G32R32UI:
2919 case FORMAT_X32B32G32R32F_UNSIGNED:
2920 case FORMAT_R8UI:
2921 case FORMAT_G8R8UI:
2922 case FORMAT_X8B8G8R8UI:
2923 case FORMAT_A8B8G8R8UI:
2924 case FORMAT_D32F:
2925 case FORMAT_D32FS8:
2926 case FORMAT_D32F_COMPLEMENTARY:
2927 case FORMAT_D32FS8_COMPLEMENTARY:
2928 case FORMAT_D32F_LOCKABLE:
2929 case FORMAT_D32FS8_TEXTURE:
2930 case FORMAT_D32F_SHADOW:
2931 case FORMAT_D32FS8_SHADOW:
2932 case FORMAT_A8:
2933 case FORMAT_R8:
2934 case FORMAT_L8:
2935 case FORMAT_L16:
2936 case FORMAT_A8L8:
2937 case FORMAT_YV12_BT601:
2938 case FORMAT_YV12_BT709:
2939 case FORMAT_YV12_JFIF:
2940 return true;
2941 case FORMAT_A8B8G8R8I:
2942 case FORMAT_A16B16G16R16I:
2943 case FORMAT_A32B32G32R32I:
2944 case FORMAT_A8B8G8R8_SNORM:
2945 case FORMAT_Q8W8V8U8:
2946 case FORMAT_Q16W16V16U16:
2947 case FORMAT_A32B32G32R32F:
2948 return false;
2949 case FORMAT_R32F:
2950 case FORMAT_R8I:
2951 case FORMAT_R16I:
2952 case FORMAT_R32I:
2953 case FORMAT_R8_SNORM:
2954 return component >= 1;
2955 case FORMAT_V8U8:
2956 case FORMAT_X8L8V8U8:
2957 case FORMAT_V16U16:
2958 case FORMAT_G32R32F:
2959 case FORMAT_G8R8I:
2960 case FORMAT_G16R16I:
2961 case FORMAT_G32R32I:
2962 case FORMAT_G8R8_SNORM:
2963 return component >= 2;
2964 case FORMAT_A16W16V16U16:
2965 case FORMAT_B32G32R32F:
2966 case FORMAT_X32B32G32R32F:
2967 case FORMAT_X8B8G8R8I:
2968 case FORMAT_X16B16G16R16I:
2969 case FORMAT_X32B32G32R32I:
2970 case FORMAT_X8B8G8R8_SNORM:
2971 return component >= 3;
2972 default:
2973 ASSERT(false);
2974 }
2975
2976 return false;
2977 }
2978
2979 bool Surface::isSRGBreadable(Format format)
2980 {
2981 // Keep in sync with Capabilities::isSRGBreadable
2982 switch(format)
2983 {
2984 case FORMAT_L8:
2985 case FORMAT_A8L8:
2986 case FORMAT_R8G8B8:
2987 case FORMAT_A8R8G8B8:
2988 case FORMAT_X8R8G8B8:
2989 case FORMAT_A8B8G8R8:
2990 case FORMAT_X8B8G8R8:
2991 case FORMAT_SRGB8_X8:
2992 case FORMAT_SRGB8_A8:
2993 case FORMAT_R5G6B5:
2994 case FORMAT_X1R5G5B5:
2995 case FORMAT_A1R5G5B5:
2996 case FORMAT_A4R4G4B4:
2997 case FORMAT_DXT1:
2998 case FORMAT_DXT3:
2999 case FORMAT_DXT5:
3000 case FORMAT_ATI1:
3001 case FORMAT_ATI2:
3002 return true;
3003 default:
3004 return false;
3005 }
3006 }
3007
3008 bool Surface::isSRGBwritable(Format format)
3009 {
3010 // Keep in sync with Capabilities::isSRGBwritable
3011 switch(format)
3012 {
3013 case FORMAT_NULL:
3014 case FORMAT_A8R8G8B8:
3015 case FORMAT_X8R8G8B8:
3016 case FORMAT_A8B8G8R8:
3017 case FORMAT_X8B8G8R8:
3018 case FORMAT_SRGB8_X8:
3019 case FORMAT_SRGB8_A8:
3020 case FORMAT_R5G6B5:
3021 return true;
3022 default:
3023 return false;
3024 }
3025 }
3026
3027 bool Surface::isSRGBformat(Format format)
3028 {
3029 switch(format)
3030 {
3031 case FORMAT_SRGB8_X8:
3032 case FORMAT_SRGB8_A8:
3033 return true;
3034 default:
3035 return false;
3036 }
3037 }
3038
3039 bool Surface::isCompressed(Format format)
3040 {
3041 switch(format)
3042 {
3043 case FORMAT_DXT1:
3044 case FORMAT_DXT3:
3045 case FORMAT_DXT5:
3046 case FORMAT_ATI1:
3047 case FORMAT_ATI2:
3048 case FORMAT_ETC1:
3049 case FORMAT_R11_EAC:
3050 case FORMAT_SIGNED_R11_EAC:
3051 case FORMAT_RG11_EAC:
3052 case FORMAT_SIGNED_RG11_EAC:
3053 case FORMAT_RGB8_ETC2:
3054 case FORMAT_SRGB8_ETC2:
3055 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3056 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3057 case FORMAT_RGBA8_ETC2_EAC:
3058 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3059 case FORMAT_RGBA_ASTC_4x4_KHR:
3060 case FORMAT_RGBA_ASTC_5x4_KHR:
3061 case FORMAT_RGBA_ASTC_5x5_KHR:
3062 case FORMAT_RGBA_ASTC_6x5_KHR:
3063 case FORMAT_RGBA_ASTC_6x6_KHR:
3064 case FORMAT_RGBA_ASTC_8x5_KHR:
3065 case FORMAT_RGBA_ASTC_8x6_KHR:
3066 case FORMAT_RGBA_ASTC_8x8_KHR:
3067 case FORMAT_RGBA_ASTC_10x5_KHR:
3068 case FORMAT_RGBA_ASTC_10x6_KHR:
3069 case FORMAT_RGBA_ASTC_10x8_KHR:
3070 case FORMAT_RGBA_ASTC_10x10_KHR:
3071 case FORMAT_RGBA_ASTC_12x10_KHR:
3072 case FORMAT_RGBA_ASTC_12x12_KHR:
3073 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3074 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3075 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3076 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3077 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3078 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3079 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3080 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3081 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3082 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3083 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3084 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3085 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3086 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3087 return true;
3088 default:
3089 return false;
3090 }
3091 }
3092
3093 bool Surface::isSignedNonNormalizedInteger(Format format)
3094 {
3095 switch(format)
3096 {
3097 case FORMAT_A8B8G8R8I:
3098 case FORMAT_X8B8G8R8I:
3099 case FORMAT_G8R8I:
3100 case FORMAT_R8I:
3101 case FORMAT_A16B16G16R16I:
3102 case FORMAT_X16B16G16R16I:
3103 case FORMAT_G16R16I:
3104 case FORMAT_R16I:
3105 case FORMAT_A32B32G32R32I:
3106 case FORMAT_X32B32G32R32I:
3107 case FORMAT_G32R32I:
3108 case FORMAT_R32I:
3109 return true;
3110 default:
3111 return false;
3112 }
3113 }
3114
3115 bool Surface::isUnsignedNonNormalizedInteger(Format format)
3116 {
3117 switch(format)
3118 {
3119 case FORMAT_A8B8G8R8UI:
3120 case FORMAT_X8B8G8R8UI:
3121 case FORMAT_G8R8UI:
3122 case FORMAT_R8UI:
3123 case FORMAT_A16B16G16R16UI:
3124 case FORMAT_X16B16G16R16UI:
3125 case FORMAT_G16R16UI:
3126 case FORMAT_R16UI:
3127 case FORMAT_A32B32G32R32UI:
3128 case FORMAT_X32B32G32R32UI:
3129 case FORMAT_G32R32UI:
3130 case FORMAT_R32UI:
3131 return true;
3132 default:
3133 return false;
3134 }
3135 }
3136
3137 bool Surface::isNonNormalizedInteger(Format format)
3138 {
3139 return isSignedNonNormalizedInteger(format) ||
3140 isUnsignedNonNormalizedInteger(format);
3141 }
3142
3143 bool Surface::isNormalizedInteger(Format format)
3144 {
3145 return !isFloatFormat(format) &&
3146 !isNonNormalizedInteger(format) &&
3147 !isCompressed(format) &&
3148 !isDepth(format) &&
3149 !isStencil(format);
3150 }
3151
3152 int Surface::componentCount(Format format)
3153 {
3154 switch(format)
3155 {
3156 case FORMAT_R5G6B5: return 3;
3157 case FORMAT_X8R8G8B8: return 3;
3158 case FORMAT_X8B8G8R8I: return 3;
3159 case FORMAT_X8B8G8R8: return 3;
3160 case FORMAT_A8R8G8B8: return 4;
3161 case FORMAT_SRGB8_X8: return 3;
3162 case FORMAT_SRGB8_A8: return 4;
3163 case FORMAT_A8B8G8R8I: return 4;
3164 case FORMAT_A8B8G8R8: return 4;
3165 case FORMAT_G8R8I: return 2;
3166 case FORMAT_G8R8: return 2;
3167 case FORMAT_R8_SNORM: return 1;
3168 case FORMAT_G8R8_SNORM: return 2;
3169 case FORMAT_X8B8G8R8_SNORM:return 3;
3170 case FORMAT_A8B8G8R8_SNORM:return 4;
3171 case FORMAT_R8UI: return 1;
3172 case FORMAT_G8R8UI: return 2;
3173 case FORMAT_X8B8G8R8UI: return 3;
3174 case FORMAT_A8B8G8R8UI: return 4;
3175 case FORMAT_A2B10G10R10: return 4;
3176 case FORMAT_A2B10G10R10UI: return 4;
3177 case FORMAT_G16R16I: return 2;
3178 case FORMAT_G16R16UI: return 2;
3179 case FORMAT_G16R16: return 2;
3180 case FORMAT_G32R32I: return 2;
3181 case FORMAT_G32R32UI: return 2;
3182 case FORMAT_X16B16G16R16I: return 3;
3183 case FORMAT_X16B16G16R16UI: return 3;
3184 case FORMAT_A16B16G16R16I: return 4;
3185 case FORMAT_A16B16G16R16UI: return 4;
3186 case FORMAT_A16B16G16R16: return 4;
3187 case FORMAT_X32B32G32R32I: return 3;
3188 case FORMAT_X32B32G32R32UI: return 3;
3189 case FORMAT_A32B32G32R32I: return 4;
3190 case FORMAT_A32B32G32R32UI: return 4;
3191 case FORMAT_V8U8: return 2;
3192 case FORMAT_Q8W8V8U8: return 4;
3193 case FORMAT_X8L8V8U8: return 3;
3194 case FORMAT_V16U16: return 2;
3195 case FORMAT_A16W16V16U16: return 4;
3196 case FORMAT_Q16W16V16U16: return 4;
3197 case FORMAT_R32F: return 1;
3198 case FORMAT_G32R32F: return 2;
3199 case FORMAT_X32B32G32R32F: return 3;
3200 case FORMAT_A32B32G32R32F: return 4;
3201 case FORMAT_X32B32G32R32F_UNSIGNED: return 3;
3202 case FORMAT_D32F: return 1;
3203 case FORMAT_D32FS8: return 1;
3204 case FORMAT_D32F_LOCKABLE: return 1;
3205 case FORMAT_D32FS8_TEXTURE: return 1;
3206 case FORMAT_D32F_SHADOW: return 1;
3207 case FORMAT_D32FS8_SHADOW: return 1;
3208 case FORMAT_A8: return 1;
3209 case FORMAT_R8I: return 1;
3210 case FORMAT_R8: return 1;
3211 case FORMAT_R16I: return 1;
3212 case FORMAT_R16UI: return 1;
3213 case FORMAT_R32I: return 1;
3214 case FORMAT_R32UI: return 1;
3215 case FORMAT_L8: return 1;
3216 case FORMAT_L16: return 1;
3217 case FORMAT_A8L8: return 2;
3218 case FORMAT_YV12_BT601: return 3;
3219 case FORMAT_YV12_BT709: return 3;
3220 case FORMAT_YV12_JFIF: return 3;
3221 default:
3222 ASSERT(false);
3223 }
3224
3225 return 1;
3226 }
3227
3228 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format)
3229 {
3230 return allocate(size(width, height, depth, border, samples, format));
3231 }
3232
3233 void Surface::memfill4(void *buffer, int pattern, int bytes)
3234 {
3235 while((size_t)buffer & 0x1 && bytes >= 1)
3236 {
3237 *(char*)buffer = (char)pattern;
3238 (char*&)buffer += 1;
3239 bytes -= 1;
3240 }
3241
3242 while((size_t)buffer & 0x3 && bytes >= 2)
3243 {
3244 *(short*)buffer = (short)pattern;
3245 (short*&)buffer += 1;
3246 bytes -= 2;
3247 }
3248
3249 #if defined(__i386__) || defined(__x86_64__)
3250 if(CPUID::supportsSSE())
3251 {
3252 while((size_t)buffer & 0xF && bytes >= 4)
3253 {
3254 *(int*)buffer = pattern;
3255 (int*&)buffer += 1;
3256 bytes -= 4;
3257 }
3258
3259 __m128 quad = _mm_set_ps1((float&)pattern);
3260
3261 float *pointer = (float*)buffer;
3262 int qxwords = bytes / 64;
3263 bytes -= qxwords * 64;
3264
3265 while(qxwords--)
3266 {
3267 _mm_stream_ps(pointer + 0, quad);
3268 _mm_stream_ps(pointer + 4, quad);
3269 _mm_stream_ps(pointer + 8, quad);
3270 _mm_stream_ps(pointer + 12, quad);
3271
3272 pointer += 16;
3273 }
3274
3275 buffer = pointer;
3276 }
3277 #endif
3278
3279 while(bytes >= 4)
3280 {
3281 *(int*)buffer = (int)pattern;
3282 (int*&)buffer += 1;
3283 bytes -= 4;
3284 }
3285
3286 while(bytes >= 2)
3287 {
3288 *(short*)buffer = (short)pattern;
3289 (short*&)buffer += 1;
3290 bytes -= 2;
3291 }
3292
3293 while(bytes >= 1)
3294 {
3295 *(char*)buffer = (char)pattern;
3296 (char*&)buffer += 1;
3297 bytes -= 1;
3298 }
3299 }
3300
3301 void Surface::sync()
3302 {
3303 resource->lock(EXCLUSIVE);
3304 resource->unlock();
3305 }
3306
3307 bool Surface::isEntire(const Rect& rect) const
3308 {
3309 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3310 }
3311
3312 Rect Surface::getRect() const
3313 {
3314 return Rect(0, 0, internal.width, internal.height);
3315 }
3316
3317 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
3318 {
3319 if(width == 0 || height == 0)
3320 {
3321 return;
3322 }
3323
3324 if(internal.format == FORMAT_NULL)
3325 {
3326 return;
3327 }
3328
3329 // Not overlapping
3330 if(x0 > internal.width) return;
3331 if(y0 > internal.height) return;
3332 if(x0 + width < 0) return;
3333 if(y0 + height < 0) return;
3334
3335 // Clip against dimensions
3336 if(x0 < 0) {width += x0; x0 = 0;}
3337 if(x0 + width > internal.width) width = internal.width - x0;
3338 if(y0 < 0) {height += y0; y0 = 0;}
3339 if(y0 + height > internal.height) height = internal.height - y0;
3340
3341 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3342 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3343
3344 int x1 = x0 + width;
3345 int y1 = y0 + height;
3346
3347 if(!hasQuadLayout(internal.format))
3348 {
3349 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC);
3350
3351 for(int z = 0; z < internal.samples; z++)
3352 {
3353 float *row = target;
3354 for(int y = y0; y < y1; y++)
3355 {
3356 memfill4(row, (int&)depth, width * sizeof(float));
3357 row += internal.pitchP;
3358 }
3359 target += internal.sliceP;
3360 }
3361
3362 unlockInternal();
3363 }
3364 else // Quad layout
3365 {
3366 if(complementaryDepthBuffer)
3367 {
3368 depth = 1 - depth;
3369 }
3370
3371 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3372
3373 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3374 int oddX1 = (x1 & ~1) * 2;
3375 int evenX0 = ((x0 + 1) & ~1) * 2;
3376 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3377
3378 for(int z = 0; z < internal.samples; z++)
3379 {
3380 for(int y = y0; y < y1; y++)
3381 {
3382 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2;
3383
3384 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3385 {
3386 if((x0 & 1) != 0)
3387 {
3388 target[oddX0 + 0] = depth;
3389 target[oddX0 + 2] = depth;
3390 }
3391
3392 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
3393 // {
3394 // target[x2 + 0] = depth;
3395 // target[x2 + 1] = depth;
3396 // target[x2 + 2] = depth;
3397 // target[x2 + 3] = depth;
3398 // }
3399
3400 // __asm
3401 // {
3402 // movss xmm0, depth
3403 // shufps xmm0, xmm0, 0x00
3404 //
3405 // mov eax, x0
3406 // add eax, 1
3407 // and eax, 0xFFFFFFFE
3408 // cmp eax, x1
3409 // jge qEnd
3410 //
3411 // mov edi, target
3412 //
3413 // qLoop:
3414 // movntps [edi+8*eax], xmm0
3415 //
3416 // add eax, 2
3417 // cmp eax, x1
3418 // jl qLoop
3419 // qEnd:
3420 // }
3421
3422 memfill4(&target[evenX0], (int&)depth, evenBytes);
3423
3424 if((x1 & 1) != 0)
3425 {
3426 target[oddX1 + 0] = depth;
3427 target[oddX1 + 2] = depth;
3428 }
3429
3430 y++;
3431 }
3432 else
3433 {
3434 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
3435 {
3436 target[i] = depth;
3437 }
3438 }
3439 }
3440
3441 buffer += internal.sliceP;
3442 }
3443
3444 unlockInternal();
3445 }
3446 }
3447
3448 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
3449 {
3450 if(mask == 0 || width == 0 || height == 0)
3451 {
3452 return;
3453 }
3454
3455 if(stencil.format == FORMAT_NULL)
3456 {
3457 return;
3458 }
3459
3460 // Not overlapping
3461 if(x0 > internal.width) return;
3462 if(y0 > internal.height) return;
3463 if(x0 + width < 0) return;
3464 if(y0 + height < 0) return;
3465
3466 // Clip against dimensions
3467 if(x0 < 0) {width += x0; x0 = 0;}
3468 if(x0 + width > internal.width) width = internal.width - x0;
3469 if(y0 < 0) {height += y0; y0 = 0;}
3470 if(y0 + height > internal.height) height = internal.height - y0;
3471
3472 int x1 = x0 + width;
3473 int y1 = y0 + height;
3474
3475 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3476 int oddX1 = (x1 & ~1) * 2;
3477 int evenX0 = ((x0 + 1) & ~1) * 2;
3478 int evenBytes = oddX1 - evenX0;
3479
3480 unsigned char maskedS = s & mask;
3481 unsigned char invMask = ~mask;
3482 unsigned int fill = maskedS;
3483 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
3484
3485 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
3486
3487 // Stencil buffers are assumed to use quad layout
3488 for(int z = 0; z < stencil.samples; z++)
3489 {
3490 for(int y = y0; y < y1; y++)
3491 {
3492 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2;
3493
3494 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
3495 {
3496 if((x0 & 1) != 0)
3497 {
3498 target[oddX0 + 0] = fill;
3499 target[oddX0 + 2] = fill;
3500 }
3501
3502 memfill4(&target[evenX0], fill, evenBytes);
3503
3504 if((x1 & 1) != 0)
3505 {
3506 target[oddX1 + 0] = fill;
3507 target[oddX1 + 2] = fill;
3508 }
3509
3510 y++;
3511 }
3512 else
3513 {
3514 for(int x = x0; x < x1; x++)
3515 {
3516 int i = (x & ~1) * 2 + (x & 1);
3517 target[i] = maskedS | (target[i] & invMask);
3518 }
3519 }
3520 }
3521
3522 buffer += stencil.sliceP;
3523 }
3524
3525 unlockStencil();
3526 }
3527
3528 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3529 {
3530 unsigned char *row;
3531 Buffer *buffer;
3532
3533 if(internal.dirty)
3534 {
3535 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3536 buffer = &internal;
3537 }
3538 else
3539 {
3540 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3541 buffer = &external;
3542 }
3543
3544 if(buffer->bytes <= 4)
3545 {
3546 int c;
3547 buffer->write(&c, color);
3548
3549 if(buffer->bytes <= 1) c = (c << 8) | c;
3550 if(buffer->bytes <= 2) c = (c << 16) | c;
3551
3552 for(int y = 0; y < height; y++)
3553 {
3554 memfill4(row, c, width * buffer->bytes);
3555
3556 row += buffer->pitchB;
3557 }
3558 }
3559 else // Generic
3560 {
3561 for(int y = 0; y < height; y++)
3562 {
3563 unsigned char *element = row;
3564
3565 for(int x = 0; x < width; x++)
3566 {
3567 buffer->write(element, color);
3568
3569 element += buffer->bytes;
3570 }
3571
3572 row += buffer->pitchB;
3573 }
3574 }
3575
3576 if(buffer == &internal)
3577 {
3578 unlockInternal();
3579 }
3580 else
3581 {
3582 unlockExternal();
3583 }
3584 }
3585
3586 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter)
3587 {
3588 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3589
3590 sw::Color<float> color;
3591
3592 if(!filter)
3593 {
3594 color = source->internal.read((int)srcX, (int)srcY, 0);
3595 }
3596 else // Bilinear filtering
3597 {
3598 color = source->internal.sample(srcX, srcY, 0);
3599 }
3600
3601 internal.write(x, y, color);
3602 }
3603
3604 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3605 {
3606 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3607
3608 sw::Color<float> color;
3609
3610 if(!filter)
3611 {
3612 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3613 }
3614 else // Bilinear filtering
3615 {
3616 color = source->internal.sample(srcX, srcY, srcZ);
3617 }
3618
3619 internal.write(x, y, z, color);
3620 }
3621
3622 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge)
3623 {
3624 Surface *dst = this;
3625
3626 // Figure out if the edges to be copied in reverse order respectively from one another
3627 // The copy should be reversed whenever the same edges are contiguous or if we're
3628 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
3629 //
3630 // | +y |
3631 // | -x | +z | +x | -z |
3632 // | -y |
3633
3634 bool reverse = (srcEdge == dstEdge) ||
3635 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
3636 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
3637 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
3638 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
3639
3640 int srcBytes = src->bytes(src->Surface::getInternalFormat());
3641 int srcPitch = src->getInternalPitchB();
3642 int dstBytes = dst->bytes(dst->Surface::getInternalFormat());
3643 int dstPitch = dst->getInternalPitchB();
3644
3645 int srcW = src->getWidth();
3646 int srcH = src->getHeight();
3647 int dstW = dst->getWidth();
3648 int dstH = dst->getHeight();
3649
3650 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes);
3651
3652 // Src is expressed in the regular [0, width-1], [0, height-1] space
3653 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch;
3654 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0));
3655
3656 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space
3657 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1);
3658 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta);
3659
3660 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart;
3661 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart;
3662
3663 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta)
3664 {
3665 memcpy(dstBuf, srcBuf, srcBytes);
3666 }
3667
3668 if(dstEdge == LEFT || dstEdge == RIGHT)
3669 {
3670 // TOP and BOTTOM are already set, let's average out the corners
3671 int x0 = (dstEdge == RIGHT) ? dstW : -1;
3672 int y0 = -1;
3673 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0;
3674 int y1 = 0;
3675 dst->computeCubeCorner(x0, y0, x1, y1);
3676 y0 = dstH;
3677 y1 = dstH - 1;
3678 dst->computeCubeCorner(x0, y0, x1, y1);
3679 }
3680
3681 src->unlockInternal();
3682 dst->unlockInternal();
3683 }
3684
3685 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1)
3686 {
3687 ASSERT(internal.lock != LOCK_UNLOCKED);
3688
3689 sw::Color<float> color = internal.read(x0, y1);
3690 color += internal.read(x1, y0);
3691 color += internal.read(x1, y1);
3692 color *= (1.0f / 3.0f);
3693
3694 internal.write(x0, y0, color);
3695 }
3696
3697 bool Surface::hasStencil() const
3698 {
3699 return isStencil(external.format);
3700 }
3701
3702 bool Surface::hasDepth() const
3703 {
3704 return isDepth(external.format);
3705 }
3706
3707 bool Surface::hasPalette() const
3708 {
3709 return isPalette(external.format);
3710 }
3711
3712 bool Surface::isRenderTarget() const
3713 {
3714 return renderTarget;
3715 }
3716
3717 bool Surface::hasDirtyContents() const
3718 {
3719 return dirtyContents;
3720 }
3721
3722 void Surface::markContentsClean()
3723 {
3724 dirtyContents = false;
3725 }
3726
3727 Resource *Surface::getResource()
3728 {
3729 return resource;
3730 }
3731
3732 bool Surface::identicalBuffers() const
3733 {
3734 return external.format == internal.format &&
3735 external.width == internal.width &&
3736 external.height == internal.height &&
3737 external.depth == internal.depth &&
3738 external.pitchB == internal.pitchB &&
3739 external.sliceB == internal.sliceB &&
3740 external.border == internal.border &&
3741 external.samples == internal.samples;
3742 }
3743
3744 Format Surface::selectInternalFormat(Format format) const
3745 {
3746 switch(format)
3747 {
3748 case FORMAT_NULL:
3749 return FORMAT_NULL;
3750 case FORMAT_P8:
3751 case FORMAT_A8P8:
3752 case FORMAT_A4R4G4B4:
3753 case FORMAT_A1R5G5B5:
3754 case FORMAT_A8R3G3B2:
3755 return FORMAT_A8R8G8B8;
3756 case FORMAT_A8:
3757 return FORMAT_A8;
3758 case FORMAT_R8I:
3759 return FORMAT_R8I;
3760 case FORMAT_R8UI:
3761 return FORMAT_R8UI;
3762 case FORMAT_R8_SNORM:
3763 return FORMAT_R8_SNORM;
3764 case FORMAT_R8:
3765 return FORMAT_R8;
3766 case FORMAT_R16I:
3767 return FORMAT_R16I;
3768 case FORMAT_R16UI:
3769 return FORMAT_R16UI;
3770 case FORMAT_R32I:
3771 return FORMAT_R32I;
3772 case FORMAT_R32UI:
3773 return FORMAT_R32UI;
3774 case FORMAT_X16B16G16R16I:
3775 return FORMAT_X16B16G16R16I;
3776 case FORMAT_A16B16G16R16I:
3777 return FORMAT_A16B16G16R16I;
3778 case FORMAT_X16B16G16R16UI:
3779 return FORMAT_X16B16G16R16UI;
3780 case FORMAT_A16B16G16R16UI:
3781 return FORMAT_A16B16G16R16UI;
3782 case FORMAT_A2R10G10B10:
3783 case FORMAT_A2B10G10R10:
3784 case FORMAT_A16B16G16R16:
3785 return FORMAT_A16B16G16R16;
3786 case FORMAT_A2B10G10R10UI:
3787 return FORMAT_A16B16G16R16UI;
3788 case FORMAT_X32B32G32R32I:
3789 return FORMAT_X32B32G32R32I;
3790 case FORMAT_A32B32G32R32I:
3791 return FORMAT_A32B32G32R32I;
3792 case FORMAT_X32B32G32R32UI:
3793 return FORMAT_X32B32G32R32UI;
3794 case FORMAT_A32B32G32R32UI:
3795 return FORMAT_A32B32G32R32UI;
3796 case FORMAT_G8R8I:
3797 return FORMAT_G8R8I;
3798 case FORMAT_G8R8UI:
3799 return FORMAT_G8R8UI;
3800 case FORMAT_G8R8_SNORM:
3801 return FORMAT_G8R8_SNORM;
3802 case FORMAT_G8R8:
3803 return FORMAT_G8R8;
3804 case FORMAT_G16R16I:
3805 return FORMAT_G16R16I;
3806 case FORMAT_G16R16UI:
3807 return FORMAT_G16R16UI;
3808 case FORMAT_G16R16:
3809 return FORMAT_G16R16;
3810 case FORMAT_G32R32I:
3811 return FORMAT_G32R32I;
3812 case FORMAT_G32R32UI:
3813 return FORMAT_G32R32UI;
3814 case FORMAT_A8R8G8B8:
3815 if(lockable || !quadLayoutEnabled)
3816 {
3817 return FORMAT_A8R8G8B8;
3818 }
3819 else
3820 {
3821 return FORMAT_A8G8R8B8Q;
3822 }
3823 case FORMAT_A8B8G8R8I:
3824 return FORMAT_A8B8G8R8I;
3825 case FORMAT_A8B8G8R8UI:
3826 return FORMAT_A8B8G8R8UI;
3827 case FORMAT_A8B8G8R8_SNORM:
3828 return FORMAT_A8B8G8R8_SNORM;
3829 case FORMAT_R5G5B5A1:
3830 case FORMAT_R4G4B4A4:
3831 case FORMAT_A8B8G8R8:
3832 return FORMAT_A8B8G8R8;
3833 case FORMAT_R5G6B5:
3834 return FORMAT_R5G6B5;
3835 case FORMAT_R3G3B2:
3836 case FORMAT_R8G8B8:
3837 case FORMAT_X4R4G4B4:
3838 case FORMAT_X1R5G5B5:
3839 case FORMAT_X8R8G8B8:
3840 if(lockable || !quadLayoutEnabled)
3841 {
3842 return FORMAT_X8R8G8B8;
3843 }
3844 else
3845 {
3846 return FORMAT_X8G8R8B8Q;
3847 }
3848 case FORMAT_X8B8G8R8I:
3849 return FORMAT_X8B8G8R8I;
3850 case FORMAT_X8B8G8R8UI:
3851 return FORMAT_X8B8G8R8UI;
3852 case FORMAT_X8B8G8R8_SNORM:
3853 return FORMAT_X8B8G8R8_SNORM;
3854 case FORMAT_B8G8R8:
3855 case FORMAT_X8B8G8R8:
3856 return FORMAT_X8B8G8R8;
3857 case FORMAT_SRGB8_X8:
3858 return FORMAT_SRGB8_X8;
3859 case FORMAT_SRGB8_A8:
3860 return FORMAT_SRGB8_A8;
3861 // Compressed formats
3862 case FORMAT_DXT1:
3863 case FORMAT_DXT3:
3864 case FORMAT_DXT5:
3865 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3866 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3867 case FORMAT_RGBA8_ETC2_EAC:
3868 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3869 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3870 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3871 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3872 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3873 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3874 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3875 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3876 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3877 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3878 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3879 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3880 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3881 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3882 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3883 return FORMAT_A8R8G8B8;
3884 case FORMAT_RGBA_ASTC_4x4_KHR:
3885 case FORMAT_RGBA_ASTC_5x4_KHR:
3886 case FORMAT_RGBA_ASTC_5x5_KHR:
3887 case FORMAT_RGBA_ASTC_6x5_KHR:
3888 case FORMAT_RGBA_ASTC_6x6_KHR:
3889 case FORMAT_RGBA_ASTC_8x5_KHR:
3890 case FORMAT_RGBA_ASTC_8x6_KHR:
3891 case FORMAT_RGBA_ASTC_8x8_KHR:
3892 case FORMAT_RGBA_ASTC_10x5_KHR:
3893 case FORMAT_RGBA_ASTC_10x6_KHR:
3894 case FORMAT_RGBA_ASTC_10x8_KHR:
3895 case FORMAT_RGBA_ASTC_10x10_KHR:
3896 case FORMAT_RGBA_ASTC_12x10_KHR:
3897 case FORMAT_RGBA_ASTC_12x12_KHR:
3898 // ASTC supports HDR, so a floating point format is required to represent it properly
3899 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
3900 case FORMAT_ATI1:
3901 return FORMAT_R8;
3902 case FORMAT_R11_EAC:
3903 case FORMAT_SIGNED_R11_EAC:
3904 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
3905 case FORMAT_ATI2:
3906 return FORMAT_G8R8;
3907 case FORMAT_RG11_EAC:
3908 case FORMAT_SIGNED_RG11_EAC:
3909 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
3910 case FORMAT_ETC1:
3911 case FORMAT_RGB8_ETC2:
3912 case FORMAT_SRGB8_ETC2:
3913 return FORMAT_X8R8G8B8;
3914 // Bumpmap formats
3915 case FORMAT_V8U8: return FORMAT_V8U8;
3916 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3917 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3918 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3919 case FORMAT_V16U16: return FORMAT_V16U16;
3920 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3921 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3922 // Floating-point formats
3923 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
3924 case FORMAT_R16F: return FORMAT_R32F;
3925 case FORMAT_G16R16F: return FORMAT_G32R32F;
3926 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
3927 case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F;
3928 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
3929 case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
3930 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
3931 case FORMAT_R32F: return FORMAT_R32F;
3932 case FORMAT_G32R32F: return FORMAT_G32R32F;
3933 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3934 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
3935 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3936 case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED;
3937 // Luminance formats
3938 case FORMAT_L8: return FORMAT_L8;
3939 case FORMAT_A4L4: return FORMAT_A8L8;
3940 case FORMAT_L16: return FORMAT_L16;
3941 case FORMAT_A8L8: return FORMAT_A8L8;
3942 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
3943 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
3944 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
3945 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
3946 // Depth/stencil formats
3947 case FORMAT_D16:
3948 case FORMAT_D32:
3949 case FORMAT_D24X8:
3950 if(hasParent) // Texture
3951 {
3952 return FORMAT_D32F_SHADOW;
3953 }
3954 else if(complementaryDepthBuffer)
3955 {
3956 return FORMAT_D32F_COMPLEMENTARY;
3957 }
3958 else
3959 {
3960 return FORMAT_D32F;
3961 }
3962 case FORMAT_D24S8:
3963 case FORMAT_D24FS8:
3964 if(hasParent) // Texture
3965 {
3966 return FORMAT_D32FS8_SHADOW;
3967 }
3968 else if(complementaryDepthBuffer)
3969 {
3970 return FORMAT_D32FS8_COMPLEMENTARY;
3971 }
3972 else
3973 {
3974 return FORMAT_D32FS8;
3975 }
3976 case FORMAT_D32F: return FORMAT_D32F;
3977 case FORMAT_D32FS8: return FORMAT_D32FS8;
3978 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3979 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3980 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3981 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3982 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
3983 case FORMAT_S8: return FORMAT_S8;
3984 // YUV formats
3985 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3986 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3987 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
3988 default:
3989 ASSERT(false);
3990 }
3991
3992 return FORMAT_NULL;
3993 }
3994
3995 void Surface::setTexturePalette(unsigned int *palette)
3996 {
3997 Surface::palette = palette;
3998 Surface::paletteID++;
3999 }
4000
4001 void Surface::resolve()
4002 {
4003 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
4004 {
4005 return;
4006 }
4007
4008 ASSERT(internal.depth == 1); // Unimplemented
4009
4010 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
4011
4012 int width = internal.width;
4013 int height = internal.height;
4014 int pitch = internal.pitchB;
4015 int slice = internal.sliceB;
4016
4017 unsigned char *source0 = (unsigned char*)source;
4018 unsigned char *source1 = source0 + slice;
4019 unsigned char *source2 = source1 + slice;
4020 unsigned char *source3 = source2 + slice;
4021 unsigned char *source4 = source3 + slice;
4022 unsigned char *source5 = source4 + slice;
4023 unsigned char *source6 = source5 + slice;
4024 unsigned char *source7 = source6 + slice;
4025 unsigned char *source8 = source7 + slice;
4026 unsigned char *source9 = source8 + slice;
4027 unsigned char *sourceA = source9 + slice;
4028 unsigned char *sourceB = sourceA + slice;
4029 unsigned char *sourceC = sourceB + slice;
4030 unsigned char *sourceD = sourceC + slice;
4031 unsigned char *sourceE = sourceD + slice;
4032 unsigned char *sourceF = sourceE + slice;
4033
4034 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
4035 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
4036 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
4037 {
4038 #if defined(__i386__) || defined(__x86_64__)
4039 if(CPUID::supportsSSE2() && (width % 4) == 0)
4040 {
4041 if(internal.samples == 2)
4042 {
4043 for(int y = 0; y < height; y++)
4044 {
4045 for(int x = 0; x < width; x += 4)
4046 {
4047 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4048 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4049
4050 c0 = _mm_avg_epu8(c0, c1);
4051
4052 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4053 }
4054
4055 source0 += pitch;
4056 source1 += pitch;
4057 }
4058 }
4059 else if(internal.samples == 4)
4060 {
4061 for(int y = 0; y < height; y++)
4062 {
4063 for(int x = 0; x < width; x += 4)
4064 {
4065 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4066 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4067 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4068 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4069
4070 c0 = _mm_avg_epu8(c0, c1);
4071 c2 = _mm_avg_epu8(c2, c3);
4072 c0 = _mm_avg_epu8(c0, c2);
4073
4074 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4075 }
4076
4077 source0 += pitch;
4078 source1 += pitch;
4079 source2 += pitch;
4080 source3 += pitch;
4081 }
4082 }
4083 else if(internal.samples == 8)
4084 {
4085 for(int y = 0; y < height; y++)
4086 {
4087 for(int x = 0; x < width; x += 4)
4088 {
4089 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4090 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4091 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4092 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4093 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4094 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4095 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4096 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4097
4098 c0 = _mm_avg_epu8(c0, c1);
4099 c2 = _mm_avg_epu8(c2, c3);
4100 c4 = _mm_avg_epu8(c4, c5);
4101 c6 = _mm_avg_epu8(c6, c7);
4102 c0 = _mm_avg_epu8(c0, c2);
4103 c4 = _mm_avg_epu8(c4, c6);
4104 c0 = _mm_avg_epu8(c0, c4);
4105
4106 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4107 }
4108
4109 source0 += pitch;
4110 source1 += pitch;
4111 source2 += pitch;
4112 source3 += pitch;
4113 source4 += pitch;
4114 source5 += pitch;
4115 source6 += pitch;
4116 source7 += pitch;
4117 }
4118 }
4119 else if(internal.samples == 16)
4120 {
4121 for(int y = 0; y < height; y++)
4122 {
4123 for(int x = 0; x < width; x += 4)
4124 {
4125 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4126 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4127 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4128 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4129 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4130 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4131 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4132 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4133 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4134 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4135 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4136 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4137 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4138 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4139 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4140 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4141
4142 c0 = _mm_avg_epu8(c0, c1);
4143 c2 = _mm_avg_epu8(c2, c3);
4144 c4 = _mm_avg_epu8(c4, c5);
4145 c6 = _mm_avg_epu8(c6, c7);
4146 c8 = _mm_avg_epu8(c8, c9);
4147 cA = _mm_avg_epu8(cA, cB);
4148 cC = _mm_avg_epu8(cC, cD);
4149 cE = _mm_avg_epu8(cE, cF);
4150 c0 = _mm_avg_epu8(c0, c2);
4151 c4 = _mm_avg_epu8(c4, c6);
4152 c8 = _mm_avg_epu8(c8, cA);
4153 cC = _mm_avg_epu8(cC, cE);
4154 c0 = _mm_avg_epu8(c0, c4);
4155 c8 = _mm_avg_epu8(c8, cC);
4156 c0 = _mm_avg_epu8(c0, c8);
4157
4158 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4159 }
4160
4161 source0 += pitch;
4162 source1 += pitch;
4163 source2 += pitch;
4164 source3 += pitch;
4165 source4 += pitch;
4166 source5 += pitch;
4167 source6 += pitch;
4168 source7 += pitch;
4169 source8 += pitch;
4170 source9 += pitch;
4171 sourceA += pitch;
4172 sourceB += pitch;
4173 sourceC += pitch;
4174 sourceD += pitch;
4175 sourceE += pitch;
4176 sourceF += pitch;
4177 }
4178 }
4179 else ASSERT(false);
4180 }
4181 else
4182 #endif
4183 {
4184 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
4185
4186 if(internal.samples == 2)
4187 {
4188 for(int y = 0; y < height; y++)
4189 {
4190 for(int x = 0; x < width; x++)
4191 {
4192 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4193 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4194
4195 c0 = AVERAGE(c0, c1);
4196
4197 *(unsigned int*)(source0 + 4 * x) = c0;
4198 }
4199
4200 source0 += pitch;
4201 source1 += pitch;
4202 }
4203 }
4204 else if(internal.samples == 4)
4205 {
4206 for(int y = 0; y < height; y++)
4207 {
4208 for(int x = 0; x < width; x++)
4209 {
4210 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4211 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4212 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4213 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4214
4215 c0 = AVERAGE(c0, c1);
4216 c2 = AVERAGE(c2, c3);
4217 c0 = AVERAGE(c0, c2);
4218
4219 *(unsigned int*)(source0 + 4 * x) = c0;
4220 }
4221
4222 source0 += pitch;
4223 source1 += pitch;
4224 source2 += pitch;
4225 source3 += pitch;
4226 }
4227 }
4228 else if(internal.samples == 8)
4229 {
4230 for(int y = 0; y < height; y++)
4231 {
4232 for(int x = 0; x < width; x++)
4233 {
4234 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4235 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4236 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4237 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4238 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4239 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4240 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4241 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4242
4243 c0 = AVERAGE(c0, c1);
4244 c2 = AVERAGE(c2, c3);
4245 c4 = AVERAGE(c4, c5);
4246 c6 = AVERAGE(c6, c7);
4247 c0 = AVERAGE(c0, c2);
4248 c4 = AVERAGE(c4, c6);
4249 c0 = AVERAGE(c0, c4);
4250
4251 *(unsigned int*)(source0 + 4 * x) = c0;
4252 }
4253
4254 source0 += pitch;
4255 source1 += pitch;
4256 source2 += pitch;
4257 source3 += pitch;
4258 source4 += pitch;
4259 source5 += pitch;
4260 source6 += pitch;
4261 source7 += pitch;
4262 }
4263 }
4264 else if(internal.samples == 16)
4265 {
4266 for(int y = 0; y < height; y++)
4267 {
4268 for(int x = 0; x < width; x++)
4269 {
4270 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4271 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4272 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4273 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4274 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4275 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4276 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4277 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4278 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4279 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4280 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4281 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4282 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4283 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4284 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4285 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4286
4287 c0 = AVERAGE(c0, c1);
4288 c2 = AVERAGE(c2, c3);
4289 c4 = AVERAGE(c4, c5);
4290 c6 = AVERAGE(c6, c7);
4291 c8 = AVERAGE(c8, c9);
4292 cA = AVERAGE(cA, cB);
4293 cC = AVERAGE(cC, cD);
4294 cE = AVERAGE(cE, cF);
4295 c0 = AVERAGE(c0, c2);
4296 c4 = AVERAGE(c4, c6);
4297 c8 = AVERAGE(c8, cA);
4298 cC = AVERAGE(cC, cE);
4299 c0 = AVERAGE(c0, c4);
4300 c8 = AVERAGE(c8, cC);
4301 c0 = AVERAGE(c0, c8);
4302
4303 *(unsigned int*)(source0 + 4 * x) = c0;
4304 }
4305
4306 source0 += pitch;
4307 source1 += pitch;
4308 source2 += pitch;
4309 source3 += pitch;
4310 source4 += pitch;
4311 source5 += pitch;
4312 source6 += pitch;
4313 source7 += pitch;
4314 source8 += pitch;
4315 source9 += pitch;
4316 sourceA += pitch;
4317 sourceB += pitch;
4318 sourceC += pitch;
4319 sourceD += pitch;
4320 sourceE += pitch;
4321 sourceF += pitch;
4322 }
4323 }
4324 else ASSERT(false);
4325
4326 #undef AVERAGE
4327 }
4328 }
4329 else if(internal.format == FORMAT_G16R16)
4330 {
4331
4332 #if defined(__i386__) || defined(__x86_64__)
4333 if(CPUID::supportsSSE2() && (width % 4) == 0)
4334 {
4335 if(internal.samples == 2)
4336 {
4337 for(int y = 0; y < height; y++)
4338 {
4339 for(int x = 0; x < width; x += 4)
4340 {
4341 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4342 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4343
4344 c0 = _mm_avg_epu16(c0, c1);
4345
4346 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4347 }
4348
4349 source0 += pitch;
4350 source1 += pitch;
4351 }
4352 }
4353 else if(internal.samples == 4)
4354 {
4355 for(int y = 0; y < height; y++)
4356 {
4357 for(int x = 0; x < width; x += 4)
4358 {
4359 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4360 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4361 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4362 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4363
4364 c0 = _mm_avg_epu16(c0, c1);
4365 c2 = _mm_avg_epu16(c2, c3);
4366 c0 = _mm_avg_epu16(c0, c2);
4367
4368 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4369 }
4370
4371 source0 += pitch;
4372 source1 += pitch;
4373 source2 += pitch;
4374 source3 += pitch;
4375 }
4376 }
4377 else if(internal.samples == 8)
4378 {
4379 for(int y = 0; y < height; y++)
4380 {
4381 for(int x = 0; x < width; x += 4)
4382 {
4383 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4384 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4385 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4386 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4387 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4388 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4389 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4390 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4391
4392 c0 = _mm_avg_epu16(c0, c1);
4393 c2 = _mm_avg_epu16(c2, c3);
4394 c4 = _mm_avg_epu16(c4, c5);
4395 c6 = _mm_avg_epu16(c6, c7);
4396 c0 = _mm_avg_epu16(c0, c2);
4397 c4 = _mm_avg_epu16(c4, c6);
4398 c0 = _mm_avg_epu16(c0, c4);
4399
4400 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4401 }
4402
4403 source0 += pitch;
4404 source1 += pitch;
4405 source2 += pitch;
4406 source3 += pitch;
4407 source4 += pitch;
4408 source5 += pitch;
4409 source6 += pitch;
4410 source7 += pitch;
4411 }
4412 }
4413 else if(internal.samples == 16)
4414 {
4415 for(int y = 0; y < height; y++)
4416 {
4417 for(int x = 0; x < width; x += 4)
4418 {
4419 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4420 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4421 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4422 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4423 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4424 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4425 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4426 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4427 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4428 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4429 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4430 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4431 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4432 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4433 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4434 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4435
4436 c0 = _mm_avg_epu16(c0, c1);
4437 c2 = _mm_avg_epu16(c2, c3);
4438 c4 = _mm_avg_epu16(c4, c5);
4439 c6 = _mm_avg_epu16(c6, c7);
4440 c8 = _mm_avg_epu16(c8, c9);
4441 cA = _mm_avg_epu16(cA, cB);
4442 cC = _mm_avg_epu16(cC, cD);
4443 cE = _mm_avg_epu16(cE, cF);
4444 c0 = _mm_avg_epu16(c0, c2);
4445 c4 = _mm_avg_epu16(c4, c6);
4446 c8 = _mm_avg_epu16(c8, cA);
4447 cC = _mm_avg_epu16(cC, cE);
4448 c0 = _mm_avg_epu16(c0, c4);
4449 c8 = _mm_avg_epu16(c8, cC);
4450 c0 = _mm_avg_epu16(c0, c8);
4451
4452 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4453 }
4454
4455 source0 += pitch;
4456 source1 += pitch;
4457 source2 += pitch;
4458 source3 += pitch;
4459 source4 += pitch;
4460 source5 += pitch;
4461 source6 += pitch;
4462 source7 += pitch;
4463 source8 += pitch;
4464 source9 += pitch;
4465 sourceA += pitch;
4466 sourceB += pitch;
4467 sourceC += pitch;
4468 sourceD += pitch;
4469 sourceE += pitch;
4470 sourceF += pitch;
4471 }
4472 }
4473 else ASSERT(false);
4474 }
4475 else
4476 #endif
4477 {
4478 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4479
4480 if(internal.samples == 2)
4481 {
4482 for(int y = 0; y < height; y++)
4483 {
4484 for(int x = 0; x < width; x++)
4485 {
4486 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4487 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4488
4489 c0 = AVERAGE(c0, c1);
4490
4491 *(unsigned int*)(source0 + 4 * x) = c0;
4492 }
4493
4494 source0 += pitch;
4495 source1 += pitch;
4496 }
4497 }
4498 else if(internal.samples == 4)
4499 {
4500 for(int y = 0; y < height; y++)
4501 {
4502 for(int x = 0; x < width; x++)
4503 {
4504 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4505 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4506 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4507 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4508
4509 c0 = AVERAGE(c0, c1);
4510 c2 = AVERAGE(c2, c3);
4511 c0 = AVERAGE(c0, c2);
4512
4513 *(unsigned int*)(source0 + 4 * x) = c0;
4514 }
4515
4516 source0 += pitch;
4517 source1 += pitch;
4518 source2 += pitch;
4519 source3 += pitch;
4520 }
4521 }
4522 else if(internal.samples == 8)
4523 {
4524 for(int y = 0; y < height; y++)
4525 {
4526 for(int x = 0; x < width; x++)
4527 {
4528 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4529 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4530 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4531 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4532 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4533 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4534 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4535 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4536
4537 c0 = AVERAGE(c0, c1);
4538 c2 = AVERAGE(c2, c3);
4539 c4 = AVERAGE(c4, c5);
4540 c6 = AVERAGE(c6, c7);
4541 c0 = AVERAGE(c0, c2);
4542 c4 = AVERAGE(c4, c6);
4543 c0 = AVERAGE(c0, c4);
4544
4545 *(unsigned int*)(source0 + 4 * x) = c0;
4546 }
4547
4548 source0 += pitch;
4549 source1 += pitch;
4550 source2 += pitch;
4551 source3 += pitch;
4552 source4 += pitch;
4553 source5 += pitch;
4554 source6 += pitch;
4555 source7 += pitch;
4556 }
4557 }
4558 else if(internal.samples == 16)
4559 {
4560 for(int y = 0; y < height; y++)
4561 {
4562 for(int x = 0; x < width; x++)
4563 {
4564 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4565 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4566 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4567 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4568 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4569 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4570 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4571 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4572 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4573 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4574 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4575 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4576 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4577 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4578 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4579 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4580
4581 c0 = AVERAGE(c0, c1);
4582 c2 = AVERAGE(c2, c3);
4583 c4 = AVERAGE(c4, c5);
4584 c6 = AVERAGE(c6, c7);
4585 c8 = AVERAGE(c8, c9);
4586 cA = AVERAGE(cA, cB);
4587 cC = AVERAGE(cC, cD);
4588 cE = AVERAGE(cE, cF);
4589 c0 = AVERAGE(c0, c2);
4590 c4 = AVERAGE(c4, c6);
4591 c8 = AVERAGE(c8, cA);
4592 cC = AVERAGE(cC, cE);
4593 c0 = AVERAGE(c0, c4);
4594 c8 = AVERAGE(c8, cC);
4595 c0 = AVERAGE(c0, c8);
4596
4597 *(unsigned int*)(source0 + 4 * x) = c0;
4598 }
4599
4600 source0 += pitch;
4601 source1 += pitch;
4602 source2 += pitch;
4603 source3 += pitch;
4604 source4 += pitch;
4605 source5 += pitch;
4606 source6 += pitch;
4607 source7 += pitch;
4608 source8 += pitch;
4609 source9 += pitch;
4610 sourceA += pitch;
4611 sourceB += pitch;
4612 sourceC += pitch;
4613 sourceD += pitch;
4614 sourceE += pitch;
4615 sourceF += pitch;
4616 }
4617 }
4618 else ASSERT(false);
4619
4620 #undef AVERAGE
4621 }
4622 }
4623 else if(internal.format == FORMAT_A16B16G16R16)
4624 {
4625 #if defined(__i386__) || defined(__x86_64__)
4626 if(CPUID::supportsSSE2() && (width % 2) == 0)
4627 {
4628 if(internal.samples == 2)
4629 {
4630 for(int y = 0; y < height; y++)
4631 {
4632 for(int x = 0; x < width; x += 2)
4633 {
4634 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4635 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4636
4637 c0 = _mm_avg_epu16(c0, c1);
4638
4639 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4640 }
4641
4642 source0 += pitch;
4643 source1 += pitch;
4644 }
4645 }
4646 else if(internal.samples == 4)
4647 {
4648 for(int y = 0; y < height; y++)
4649 {
4650 for(int x = 0; x < width; x += 2)
4651 {
4652 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4653 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4654 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4655 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4656
4657 c0 = _mm_avg_epu16(c0, c1);
4658 c2 = _mm_avg_epu16(c2, c3);
4659 c0 = _mm_avg_epu16(c0, c2);
4660
4661 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4662 }
4663
4664 source0 += pitch;
4665 source1 += pitch;
4666 source2 += pitch;
4667 source3 += pitch;
4668 }
4669 }
4670 else if(internal.samples == 8)
4671 {
4672 for(int y = 0; y < height; y++)
4673 {
4674 for(int x = 0; x < width; x += 2)
4675 {
4676 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4677 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4678 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4679 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4680 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4681 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4682 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4683 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4684
4685 c0 = _mm_avg_epu16(c0, c1);
4686 c2 = _mm_avg_epu16(c2, c3);
4687 c4 = _mm_avg_epu16(c4, c5);
4688 c6 = _mm_avg_epu16(c6, c7);
4689 c0 = _mm_avg_epu16(c0, c2);
4690 c4 = _mm_avg_epu16(c4, c6);
4691 c0 = _mm_avg_epu16(c0, c4);
4692
4693 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4694 }
4695
4696 source0 += pitch;
4697 source1 += pitch;
4698 source2 += pitch;
4699 source3 += pitch;
4700 source4 += pitch;
4701 source5 += pitch;
4702 source6 += pitch;
4703 source7 += pitch;
4704 }
4705 }
4706 else if(internal.samples == 16)
4707 {
4708 for(int y = 0; y < height; y++)
4709 {
4710 for(int x = 0; x < width; x += 2)
4711 {
4712 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4713 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4714 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4715 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4716 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4717 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4718 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4719 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4720 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4721 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4722 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4723 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4724 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4725 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4726 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4727 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
4728
4729 c0 = _mm_avg_epu16(c0, c1);
4730 c2 = _mm_avg_epu16(c2, c3);
4731 c4 = _mm_avg_epu16(c4, c5);
4732 c6 = _mm_avg_epu16(c6, c7);
4733 c8 = _mm_avg_epu16(c8, c9);
4734 cA = _mm_avg_epu16(cA, cB);
4735 cC = _mm_avg_epu16(cC, cD);
4736 cE = _mm_avg_epu16(cE, cF);
4737 c0 = _mm_avg_epu16(c0, c2);
4738 c4 = _mm_avg_epu16(c4, c6);
4739 c8 = _mm_avg_epu16(c8, cA);
4740 cC = _mm_avg_epu16(cC, cE);
4741 c0 = _mm_avg_epu16(c0, c4);
4742 c8 = _mm_avg_epu16(c8, cC);
4743 c0 = _mm_avg_epu16(c0, c8);
4744
4745 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4746 }
4747
4748 source0 += pitch;
4749 source1 += pitch;
4750 source2 += pitch;
4751 source3 += pitch;
4752 source4 += pitch;
4753 source5 += pitch;
4754 source6 += pitch;
4755 source7 += pitch;
4756 source8 += pitch;
4757 source9 += pitch;
4758 sourceA += pitch;
4759 sourceB += pitch;
4760 sourceC += pitch;
4761 sourceD += pitch;
4762 sourceE += pitch;
4763 sourceF += pitch;
4764 }
4765 }
4766 else ASSERT(false);
4767 }
4768 else
4769 #endif
4770 {
4771 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4772
4773 if(internal.samples == 2)
4774 {
4775 for(int y = 0; y < height; y++)
4776 {
4777 for(int x = 0; x < 2 * width; x++)
4778 {
4779 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4780 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4781
4782 c0 = AVERAGE(c0, c1);
4783
4784 *(unsigned int*)(source0 + 4 * x) = c0;
4785 }
4786
4787 source0 += pitch;
4788 source1 += pitch;
4789 }
4790 }
4791 else if(internal.samples == 4)
4792 {
4793 for(int y = 0; y < height; y++)
4794 {
4795 for(int x = 0; x < 2 * width; x++)
4796 {
4797 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4798 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4799 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4800 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4801
4802 c0 = AVERAGE(c0, c1);
4803 c2 = AVERAGE(c2, c3);
4804 c0 = AVERAGE(c0, c2);
4805
4806 *(unsigned int*)(source0 + 4 * x) = c0;
4807 }
4808
4809 source0 += pitch;
4810 source1 += pitch;
4811 source2 += pitch;
4812 source3 += pitch;
4813 }
4814 }
4815 else if(internal.samples == 8)
4816 {
4817 for(int y = 0; y < height; y++)
4818 {
4819 for(int x = 0; x < 2 * width; x++)
4820 {
4821 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4822 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4823 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4824 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4825 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4826 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4827 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4828 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4829
4830 c0 = AVERAGE(c0, c1);
4831 c2 = AVERAGE(c2, c3);
4832 c4 = AVERAGE(c4, c5);
4833 c6 = AVERAGE(c6, c7);
4834 c0 = AVERAGE(c0, c2);
4835 c4 = AVERAGE(c4, c6);
4836 c0 = AVERAGE(c0, c4);
4837
4838 *(unsigned int*)(source0 + 4 * x) = c0;
4839 }
4840
4841 source0 += pitch;
4842 source1 += pitch;
4843 source2 += pitch;
4844 source3 += pitch;
4845 source4 += pitch;
4846 source5 += pitch;
4847 source6 += pitch;
4848 source7 += pitch;
4849 }
4850 }
4851 else if(internal.samples == 16)
4852 {
4853 for(int y = 0; y < height; y++)
4854 {
4855 for(int x = 0; x < 2 * width; x++)
4856 {
4857 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4858 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4859 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4860 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4861 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4862 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4863 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4864 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4865 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4866 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4867 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4868 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4869 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4870 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4871 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4872 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4873
4874 c0 = AVERAGE(c0, c1);
4875 c2 = AVERAGE(c2, c3);
4876 c4 = AVERAGE(c4, c5);
4877 c6 = AVERAGE(c6, c7);
4878 c8 = AVERAGE(c8, c9);
4879 cA = AVERAGE(cA, cB);
4880 cC = AVERAGE(cC, cD);
4881 cE = AVERAGE(cE, cF);
4882 c0 = AVERAGE(c0, c2);
4883 c4 = AVERAGE(c4, c6);
4884 c8 = AVERAGE(c8, cA);
4885 cC = AVERAGE(cC, cE);
4886 c0 = AVERAGE(c0, c4);
4887 c8 = AVERAGE(c8, cC);
4888 c0 = AVERAGE(c0, c8);
4889
4890 *(unsigned int*)(source0 + 4 * x) = c0;
4891 }
4892
4893 source0 += pitch;
4894 source1 += pitch;
4895 source2 += pitch;
4896 source3 += pitch;
4897 source4 += pitch;
4898 source5 += pitch;
4899 source6 += pitch;
4900 source7 += pitch;
4901 source8 += pitch;
4902 source9 += pitch;
4903 sourceA += pitch;
4904 sourceB += pitch;
4905 sourceC += pitch;
4906 sourceD += pitch;
4907 sourceE += pitch;
4908 sourceF += pitch;
4909 }
4910 }
4911 else ASSERT(false);
4912
4913 #undef AVERAGE
4914 }
4915 }
4916 else if(internal.format == FORMAT_R32F)
4917 {
4918 #if defined(__i386__) || defined(__x86_64__)
4919 if(CPUID::supportsSSE() && (width % 4) == 0)
4920 {
4921 if(internal.samples == 2)
4922 {
4923 for(int y = 0; y < height; y++)
4924 {
4925 for(int x = 0; x < width; x += 4)
4926 {
4927 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4928 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4929
4930 c0 = _mm_add_ps(c0, c1);
4931 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4932
4933 _mm_store_ps((float*)(source0 + 4 * x), c0);
4934 }
4935
4936 source0 += pitch;
4937 source1 += pitch;
4938 }
4939 }
4940 else if(internal.samples == 4)
4941 {
4942 for(int y = 0; y < height; y++)
4943 {
4944 for(int x = 0; x < width; x += 4)
4945 {
4946 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4947 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4948 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4949 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4950
4951 c0 = _mm_add_ps(c0, c1);
4952 c2 = _mm_add_ps(c2, c3);
4953 c0 = _mm_add_ps(c0, c2);
4954 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4955
4956 _mm_store_ps((float*)(source0 + 4 * x), c0);
4957 }
4958
4959 source0 += pitch;
4960 source1 += pitch;
4961 source2 += pitch;
4962 source3 += pitch;
4963 }
4964 }
4965 else if(internal.samples == 8)
4966 {
4967 for(int y = 0; y < height; y++)
4968 {
4969 for(int x = 0; x < width; x += 4)
4970 {
4971 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4972 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4973 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4974 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4975 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4976 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4977 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4978 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4979
4980 c0 = _mm_add_ps(c0, c1);
4981 c2 = _mm_add_ps(c2, c3);
4982 c4 = _mm_add_ps(c4, c5);
4983 c6 = _mm_add_ps(c6, c7);
4984 c0 = _mm_add_ps(c0, c2);
4985 c4 = _mm_add_ps(c4, c6);
4986 c0 = _mm_add_ps(c0, c4);
4987 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
4988
4989 _mm_store_ps((float*)(source0 + 4 * x), c0);
4990 }
4991
4992 source0 += pitch;
4993 source1 += pitch;
4994 source2 += pitch;
4995 source3 += pitch;
4996 source4 += pitch;
4997 source5 += pitch;
4998 source6 += pitch;
4999 source7 += pitch;
5000 }
5001 }
5002 else if(internal.samples == 16)
5003 {
5004 for(int y = 0; y < height; y++)
5005 {
5006 for(int x = 0; x < width; x += 4)
5007 {
5008 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
5009 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
5010 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
5011 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
5012 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
5013 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
5014 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
5015 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
5016 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
5017 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
5018 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
5019 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
5020 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
5021 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
5022 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
5023 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
5024
5025 c0 = _mm_add_ps(c0, c1);
5026 c2 = _mm_add_ps(c2, c3);
5027 c4 = _mm_add_ps(c4, c5);
5028 c6 = _mm_add_ps(c6, c7);
5029 c8 = _mm_add_ps(c8, c9);
5030 cA = _mm_add_ps(cA, cB);
5031 cC = _mm_add_ps(cC, cD);
5032 cE = _mm_add_ps(cE, cF);
5033 c0 = _mm_add_ps(c0, c2);
5034 c4 = _mm_add_ps(c4, c6);
5035 c8 = _mm_add_ps(c8, cA);
5036 cC = _mm_add_ps(cC, cE);
5037 c0 = _mm_add_ps(c0, c4);
5038 c8 = _mm_add_ps(c8, cC);
5039 c0 = _mm_add_ps(c0, c8);
5040 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5041
5042 _mm_store_ps((float*)(source0 + 4 * x), c0);
5043 }
5044
5045 source0 += pitch;
5046 source1 += pitch;
5047 source2 += pitch;
5048 source3 += pitch;
5049 source4 += pitch;
5050 source5 += pitch;
5051 source6 += pitch;
5052 source7 += pitch;
5053 source8 += pitch;
5054 source9 += pitch;
5055 sourceA += pitch;
5056 sourceB += pitch;
5057 sourceC += pitch;
5058 sourceD += pitch;
5059 sourceE += pitch;
5060 sourceF += pitch;
5061 }
5062 }
5063 else ASSERT(false);
5064 }
5065 else
5066 #endif
5067 {
5068 if(internal.samples == 2)
5069 {
5070 for(int y = 0; y < height; y++)
5071 {
5072 for(int x = 0; x < width; x++)
5073 {
5074 float c0 = *(float*)(source0 + 4 * x);
5075 float c1 = *(float*)(source1 + 4 * x);
5076
5077 c0 = c0 + c1;
5078 c0 *= 1.0f / 2.0f;
5079
5080 *(float*)(source0 + 4 * x) = c0;
5081 }
5082
5083 source0 += pitch;
5084 source1 += pitch;
5085 }
5086 }
5087 else if(internal.samples == 4)
5088 {
5089 for(int y = 0; y < height; y++)
5090 {
5091 for(int x = 0; x < width; x++)
5092 {
5093 float c0 = *(float*)(source0 + 4 * x);
5094 float c1 = *(float*)(source1 + 4 * x);
5095 float c2 = *(float*)(source2 + 4 * x);
5096 float c3 = *(float*)(source3 + 4 * x);
5097
5098 c0 = c0 + c1;
5099 c2 = c2 + c3;
5100 c0 = c0 + c2;
5101 c0 *= 1.0f / 4.0f;
5102
5103 *(float*)(source0 + 4 * x) = c0;
5104 }
5105
5106 source0 += pitch;
5107 source1 += pitch;
5108 source2 += pitch;
5109 source3 += pitch;
5110 }
5111 }
5112 else if(internal.samples == 8)
5113 {
5114 for(int y = 0; y < height; y++)
5115 {
5116 for(int x = 0; x < width; x++)
5117 {
5118 float c0 = *(float*)(source0 + 4 * x);
5119 float c1 = *(float*)(source1 + 4 * x);
5120 float c2 = *(float*)(source2 + 4 * x);
5121 float c3 = *(float*)(source3 + 4 * x);
5122 float c4 = *(float*)(source4 + 4 * x);
5123 float c5 = *(float*)(source5 + 4 * x);
5124 float c6 = *(float*)(source6 + 4 * x);
5125 float c7 = *(float*)(source7 + 4 * x);
5126
5127 c0 = c0 + c1;
5128 c2 = c2 + c3;
5129 c4 = c4 + c5;
5130 c6 = c6 + c7;
5131 c0 = c0 + c2;
5132 c4 = c4 + c6;
5133 c0 = c0 + c4;
5134 c0 *= 1.0f / 8.0f;
5135
5136 *(float*)(source0 + 4 * x) = c0;
5137 }
5138
5139 source0 += pitch;
5140 source1 += pitch;
5141 source2 += pitch;
5142 source3 += pitch;
5143 source4 += pitch;
5144 source5 += pitch;
5145 source6 += pitch;
5146 source7 += pitch;
5147 }
5148 }
5149 else if(internal.samples == 16)
5150 {
5151 for(int y = 0; y < height; y++)
5152 {
5153 for(int x = 0; x < width; x++)
5154 {
5155 float c0 = *(float*)(source0 + 4 * x);
5156 float c1 = *(float*)(source1 + 4 * x);
5157 float c2 = *(float*)(source2 + 4 * x);
5158 float c3 = *(float*)(source3 + 4 * x);
5159 float c4 = *(float*)(source4 + 4 * x);
5160 float c5 = *(float*)(source5 + 4 * x);
5161 float c6 = *(float*)(source6 + 4 * x);
5162 float c7 = *(float*)(source7 + 4 * x);
5163 float c8 = *(float*)(source8 + 4 * x);
5164 float c9 = *(float*)(source9 + 4 * x);
5165 float cA = *(float*)(sourceA + 4 * x);
5166 float cB = *(float*)(sourceB + 4 * x);
5167 float cC = *(float*)(sourceC + 4 * x);
5168 float cD = *(float*)(sourceD + 4 * x);
5169 float cE = *(float*)(sourceE + 4 * x);
5170 float cF = *(float*)(sourceF + 4 * x);
5171
5172 c0 = c0 + c1;
5173 c2 = c2 + c3;
5174 c4 = c4 + c5;
5175 c6 = c6 + c7;
5176 c8 = c8 + c9;
5177 cA = cA + cB;
5178 cC = cC + cD;
5179 cE = cE + cF;
5180 c0 = c0 + c2;
5181 c4 = c4 + c6;
5182 c8 = c8 + cA;
5183 cC = cC + cE;
5184 c0 = c0 + c4;
5185 c8 = c8 + cC;
5186 c0 = c0 + c8;
5187 c0 *= 1.0f / 16.0f;
5188
5189 *(float*)(source0 + 4 * x) = c0;
5190 }
5191
5192 source0 += pitch;
5193 source1 += pitch;
5194 source2 += pitch;
5195 source3 += pitch;
5196 source4 += pitch;
5197 source5 += pitch;
5198 source6 += pitch;
5199 source7 += pitch;
5200 source8 += pitch;
5201 source9 += pitch;
5202 sourceA += pitch;
5203 sourceB += pitch;
5204 sourceC += pitch;
5205 sourceD += pitch;
5206 sourceE += pitch;
5207 sourceF += pitch;
5208 }
5209 }
5210 else ASSERT(false);
5211 }
5212 }
5213 else if(internal.format == FORMAT_G32R32F)
5214 {
5215 #if defined(__i386__) || defined(__x86_64__)
5216 if(CPUID::supportsSSE() && (width % 2) == 0)
5217 {
5218 if(internal.samples == 2)
5219 {
5220 for(int y = 0; y < height; y++)
5221 {
5222 for(int x = 0; x < width; x += 2)
5223 {
5224 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5225 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5226
5227 c0 = _mm_add_ps(c0, c1);
5228 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
5229
5230 _mm_store_ps((float*)(source0 + 8 * x), c0);
5231 }
5232
5233 source0 += pitch;
5234 source1 += pitch;
5235 }
5236 }
5237 else if(internal.samples == 4)
5238 {
5239 for(int y = 0; y < height; y++)
5240 {
5241 for(int x = 0; x < width; x += 2)
5242 {
5243 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5244 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5245 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5246 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5247
5248 c0 = _mm_add_ps(c0, c1);
5249 c2 = _mm_add_ps(c2, c3);
5250 c0 = _mm_add_ps(c0, c2);
5251 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
5252
5253 _mm_store_ps((float*)(source0 + 8 * x), c0);
5254 }
5255
5256 source0 += pitch;
5257 source1 += pitch;
5258 source2 += pitch;
5259 source3 += pitch;
5260 }
5261 }
5262 else if(internal.samples == 8)
5263 {
5264 for(int y = 0; y < height; y++)
5265 {
5266 for(int x = 0; x < width; x += 2)
5267 {
5268 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5269 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5270 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5271 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5272 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5273 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5274 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5275 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5276
5277 c0 = _mm_add_ps(c0, c1);
5278 c2 = _mm_add_ps(c2, c3);
5279 c4 = _mm_add_ps(c4, c5);
5280 c6 = _mm_add_ps(c6, c7);
5281 c0 = _mm_add_ps(c0, c2);
5282 c4 = _mm_add_ps(c4, c6);
5283 c0 = _mm_add_ps(c0, c4);
5284 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
5285
5286 _mm_store_ps((float*)(source0 + 8 * x), c0);
5287 }
5288
5289 source0 += pitch;
5290 source1 += pitch;
5291 source2 += pitch;
5292 source3 += pitch;
5293 source4 += pitch;
5294 source5 += pitch;
5295 source6 += pitch;
5296 source7 += pitch;
5297 }
5298 }
5299 else if(internal.samples == 16)
5300 {
5301 for(int y = 0; y < height; y++)
5302 {
5303 for(int x = 0; x < width; x += 2)
5304 {
5305 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5306 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5307 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5308 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5309 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5310 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5311 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5312 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5313 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5314 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5315 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5316 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5317 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5318 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5319 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5320 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
5321
5322 c0 = _mm_add_ps(c0, c1);
5323 c2 = _mm_add_ps(c2, c3);
5324 c4 = _mm_add_ps(c4, c5);
5325 c6 = _mm_add_ps(c6, c7);
5326 c8 = _mm_add_ps(c8, c9);
5327 cA = _mm_add_ps(cA, cB);
5328 cC = _mm_add_ps(cC, cD);
5329 cE = _mm_add_ps(cE, cF);
5330 c0 = _mm_add_ps(c0, c2);
5331 c4 = _mm_add_ps(c4, c6);
5332 c8 = _mm_add_ps(c8, cA);
5333 cC = _mm_add_ps(cC, cE);
5334 c0 = _mm_add_ps(c0, c4);
5335 c8 = _mm_add_ps(c8, cC);
5336 c0 = _mm_add_ps(c0, c8);
5337 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5338
5339 _mm_store_ps((float*)(source0 + 8 * x), c0);
5340 }
5341
5342 source0 += pitch;
5343 source1 += pitch;
5344 source2 += pitch;
5345 source3 += pitch;
5346 source4 += pitch;
5347 source5 += pitch;
5348 source6 += pitch;
5349 source7 += pitch;
5350 source8 += pitch;
5351 source9 += pitch;
5352 sourceA += pitch;
5353 sourceB += pitch;
5354 sourceC += pitch;
5355 sourceD += pitch;
5356 sourceE += pitch;
5357 sourceF += pitch;
5358 }
5359 }
5360 else ASSERT(false);
5361 }
5362 else
5363 #endif
5364 {
5365 if(internal.samples == 2)
5366 {
5367 for(int y = 0; y < height; y++)
5368 {
5369 for(int x = 0; x < 2 * width; x++)
5370 {
5371 float c0 = *(float*)(source0 + 4 * x);
5372 float c1 = *(float*)(source1 + 4 * x);
5373
5374 c0 = c0 + c1;
5375 c0 *= 1.0f / 2.0f;
5376
5377 *(float*)(source0 + 4 * x) = c0;
5378 }
5379
5380 source0 += pitch;
5381 source1 += pitch;
5382 }
5383 }
5384 else if(internal.samples == 4)
5385 {
5386 for(int y = 0; y < height; y++)
5387 {
5388 for(int x = 0; x < 2 * width; x++)
5389 {
5390 float c0 = *(float*)(source0 + 4 * x);
5391 float c1 = *(float*)(source1 + 4 * x);
5392 float c2 = *(float*)(source2 + 4 * x);
5393 float c3 = *(float*)(source3 + 4 * x);
5394
5395 c0 = c0 + c1;
5396 c2 = c2 + c3;
5397 c0 = c0 + c2;
5398 c0 *= 1.0f / 4.0f;
5399
5400 *(float*)(source0 + 4 * x) = c0;
5401 }
5402
5403 source0 += pitch;
5404 source1 += pitch;
5405 source2 += pitch;
5406 source3 += pitch;
5407 }
5408 }
5409 else if(internal.samples == 8)
5410 {
5411 for(int y = 0; y < height; y++)
5412 {
5413 for(int x = 0; x < 2 * width; x++)
5414 {
5415 float c0 = *(float*)(source0 + 4 * x);
5416 float c1 = *(float*)(source1 + 4 * x);
5417 float c2 = *(float*)(source2 + 4 * x);
5418 float c3 = *(float*)(source3 + 4 * x);
5419 float c4 = *(float*)(source4 + 4 * x);
5420 float c5 = *(float*)(source5 + 4 * x);
5421 float c6 = *(float*)(source6 + 4 * x);
5422 float c7 = *(float*)(source7 + 4 * x);
5423
5424 c0 = c0 + c1;
5425 c2 = c2 + c3;
5426 c4 = c4 + c5;
5427 c6 = c6 + c7;
5428 c0 = c0 + c2;
5429 c4 = c4 + c6;
5430 c0 = c0 + c4;
5431 c0 *= 1.0f / 8.0f;
5432
5433 *(float*)(source0 + 4 * x) = c0;
5434 }
5435
5436 source0 += pitch;
5437 source1 += pitch;
5438 source2 += pitch;
5439 source3 += pitch;
5440 source4 += pitch;
5441 source5 += pitch;
5442 source6 += pitch;
5443 source7 += pitch;
5444 }
5445 }
5446 else if(internal.samples == 16)
5447 {
5448 for(int y = 0; y < height; y++)
5449 {
5450 for(int x = 0; x < 2 * width; x++)
5451 {
5452 float c0 = *(float*)(source0 + 4 * x);
5453 float c1 = *(float*)(source1 + 4 * x);
5454 float c2 = *(float*)(source2 + 4 * x);
5455 float c3 = *(float*)(source3 + 4 * x);
5456 float c4 = *(float*)(source4 + 4 * x);
5457 float c5 = *(float*)(source5 + 4 * x);
5458 float c6 = *(float*)(source6 + 4 * x);
5459 float c7 = *(float*)(source7 + 4 * x);
5460 float c8 = *(float*)(source8 + 4 * x);
5461 float c9 = *(float*)(source9 + 4 * x);
5462 float cA = *(float*)(sourceA + 4 * x);
5463 float cB = *(float*)(sourceB + 4 * x);
5464 float cC = *(float*)(sourceC + 4 * x);
5465 float cD = *(float*)(sourceD + 4 * x);
5466 float cE = *(float*)(sourceE + 4 * x);
5467 float cF = *(float*)(sourceF + 4 * x);
5468
5469 c0 = c0 + c1;
5470 c2 = c2 + c3;
5471 c4 = c4 + c5;
5472 c6 = c6 + c7;
5473 c8 = c8 + c9;
5474 cA = cA + cB;
5475 cC = cC + cD;
5476 cE = cE + cF;
5477 c0 = c0 + c2;
5478 c4 = c4 + c6;
5479 c8 = c8 + cA;
5480 cC = cC + cE;
5481 c0 = c0 + c4;
5482 c8 = c8 + cC;
5483 c0 = c0 + c8;
5484 c0 *= 1.0f / 16.0f;
5485
5486 *(float*)(source0 + 4 * x) = c0;
5487 }
5488
5489 source0 += pitch;
5490 source1 += pitch;
5491 source2 += pitch;
5492 source3 += pitch;
5493 source4 += pitch;
5494 source5 += pitch;
5495 source6 += pitch;
5496 source7 += pitch;
5497 source8 += pitch;
5498 source9 += pitch;
5499 sourceA += pitch;
5500 sourceB += pitch;
5501 sourceC += pitch;
5502 sourceD += pitch;
5503 sourceE += pitch;
5504 sourceF += pitch;
5505 }
5506 }
5507 else ASSERT(false);
5508 }
5509 }
5510 else if(internal.format == FORMAT_A32B32G32R32F ||
5511 internal.format == FORMAT_X32B32G32R32F ||
5512 internal.format == FORMAT_X32B32G32R32F_UNSIGNED)
5513 {
5514 #if defined(__i386__) || defined(__x86_64__)
5515 if(CPUID::supportsSSE())
5516 {
5517 if(internal.samples == 2)
5518 {
5519 for(int y = 0; y < height; y++)
5520 {
5521 for(int x = 0; x < width; x++)
5522 {
5523 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5524 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5525
5526 c0 = _mm_add_ps(c0, c1);
5527 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
5528
5529 _mm_store_ps((float*)(source0 + 16 * x), c0);
5530 }
5531
5532 source0 += pitch;
5533 source1 += pitch;
5534 }
5535 }
5536 else if(internal.samples == 4)
5537 {
5538 for(int y = 0; y < height; y++)
5539 {
5540 for(int x = 0; x < width; x++)
5541 {
5542 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5543 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5544 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5545 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5546
5547 c0 = _mm_add_ps(c0, c1);
5548 c2 = _mm_add_ps(c2, c3);
5549 c0 = _mm_add_ps(c0, c2);
5550 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
5551
5552 _mm_store_ps((float*)(source0 + 16 * x), c0);
5553 }
5554
5555 source0 += pitch;
5556 source1 += pitch;
5557 source2 += pitch;
5558 source3 += pitch;
5559 }
5560 }
5561 else if(internal.samples == 8)
5562 {
5563 for(int y = 0; y < height; y++)
5564 {
5565 for(int x = 0; x < width; x++)
5566 {
5567 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5568 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5569 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5570 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5571 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5572 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5573 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5574 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5575
5576 c0 = _mm_add_ps(c0, c1);
5577 c2 = _mm_add_ps(c2, c3);
5578 c4 = _mm_add_ps(c4, c5);
5579 c6 = _mm_add_ps(c6, c7);
5580 c0 = _mm_add_ps(c0, c2);
5581 c4 = _mm_add_ps(c4, c6);
5582 c0 = _mm_add_ps(c0, c4);
5583 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
5584
5585 _mm_store_ps((float*)(source0 + 16 * x), c0);
5586 }
5587
5588 source0 += pitch;
5589 source1 += pitch;
5590 source2 += pitch;
5591 source3 += pitch;
5592 source4 += pitch;
5593 source5 += pitch;
5594 source6 += pitch;
5595 source7 += pitch;
5596 }
5597 }
5598 else if(internal.samples == 16)
5599 {
5600 for(int y = 0; y < height; y++)
5601 {
5602 for(int x = 0; x < width; x++)
5603 {
5604 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5605 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5606 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5607 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5608 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5609 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5610 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5611 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5612 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5613 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5614 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5615 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5616 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5617 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5618 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5619 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
5620
5621 c0 = _mm_add_ps(c0, c1);
5622 c2 = _mm_add_ps(c2, c3);
5623 c4 = _mm_add_ps(c4, c5);
5624 c6 = _mm_add_ps(c6, c7);
5625 c8 = _mm_add_ps(c8, c9);
5626 cA = _mm_add_ps(cA, cB);
5627 cC = _mm_add_ps(cC, cD);
5628 cE = _mm_add_ps(cE, cF);
5629 c0 = _mm_add_ps(c0, c2);
5630 c4 = _mm_add_ps(c4, c6);
5631 c8 = _mm_add_ps(c8, cA);
5632 cC = _mm_add_ps(cC, cE);
5633 c0 = _mm_add_ps(c0, c4);
5634 c8 = _mm_add_ps(c8, cC);
5635 c0 = _mm_add_ps(c0, c8);
5636 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5637
5638 _mm_store_ps((float*)(source0 + 16 * x), c0);
5639 }
5640
5641 source0 += pitch;
5642 source1 += pitch;
5643 source2 += pitch;
5644 source3 += pitch;
5645 source4 += pitch;
5646 source5 += pitch;
5647 source6 += pitch;
5648 source7 += pitch;
5649 source8 += pitch;
5650 source9 += pitch;
5651 sourceA += pitch;
5652 sourceB += pitch;
5653 sourceC += pitch;
5654 sourceD += pitch;
5655 sourceE += pitch;
5656 sourceF += pitch;
5657 }
5658 }
5659 else ASSERT(false);
5660 }
5661 else
5662 #endif
5663 {
5664 if(internal.samples == 2)
5665 {
5666 for(int y = 0; y < height; y++)
5667 {
5668 for(int x = 0; x < 4 * width; x++)
5669 {
5670 float c0 = *(float*)(source0 + 4 * x);
5671 float c1 = *(float*)(source1 + 4 * x);
5672
5673 c0 = c0 + c1;
5674 c0 *= 1.0f / 2.0f;
5675
5676 *(float*)(source0 + 4 * x) = c0;
5677 }
5678
5679 source0 += pitch;
5680 source1 += pitch;
5681 }
5682 }
5683 else if(internal.samples == 4)
5684 {
5685 for(int y = 0; y < height; y++)
5686 {
5687 for(int x = 0; x < 4 * width; x++)
5688 {
5689 float c0 = *(float*)(source0 + 4 * x);
5690 float c1 = *(float*)(source1 + 4 * x);
5691 float c2 = *(float*)(source2 + 4 * x);
5692 float c3 = *(float*)(source3 + 4 * x);
5693
5694 c0 = c0 + c1;
5695 c2 = c2 + c3;
5696 c0 = c0 + c2;
5697 c0 *= 1.0f / 4.0f;
5698
5699 *(float*)(source0 + 4 * x) = c0;
5700 }
5701
5702 source0 += pitch;
5703 source1 += pitch;
5704 source2 += pitch;
5705 source3 += pitch;
5706 }
5707 }
5708 else if(internal.samples == 8)
5709 {
5710 for(int y = 0; y < height; y++)
5711 {
5712 for(int x = 0; x < 4 * width; x++)
5713 {
5714 float c0 = *(float*)(source0 + 4 * x);
5715 float c1 = *(float*)(source1 + 4 * x);
5716 float c2 = *(float*)(source2 + 4 * x);
5717 float c3 = *(float*)(source3 + 4 * x);
5718 float c4 = *(float*)(source4 + 4 * x);
5719 float c5 = *(float*)(source5 + 4 * x);
5720 float c6 = *(float*)(source6 + 4 * x);
5721 float c7 = *(float*)(source7 + 4 * x);
5722
5723 c0 = c0 + c1;
5724 c2 = c2 + c3;
5725 c4 = c4 + c5;
5726 c6 = c6 + c7;
5727 c0 = c0 + c2;
5728 c4 = c4 + c6;
5729 c0 = c0 + c4;
5730 c0 *= 1.0f / 8.0f;
5731
5732 *(float*)(source0 + 4 * x) = c0;
5733 }
5734
5735 source0 += pitch;
5736 source1 += pitch;
5737 source2 += pitch;
5738 source3 += pitch;
5739 source4 += pitch;
5740 source5 += pitch;
5741 source6 += pitch;
5742 source7 += pitch;
5743 }
5744 }
5745 else if(internal.samples == 16)
5746 {
5747 for(int y = 0; y < height; y++)
5748 {
5749 for(int x = 0; x < 4 * width; x++)
5750 {
5751 float c0 = *(float*)(source0 + 4 * x);
5752 float c1 = *(float*)(source1 + 4 * x);
5753 float c2 = *(float*)(source2 + 4 * x);
5754 float c3 = *(float*)(source3 + 4 * x);
5755 float c4 = *(float*)(source4 + 4 * x);
5756 float c5 = *(float*)(source5 + 4 * x);
5757 float c6 = *(float*)(source6 + 4 * x);
5758 float c7 = *(float*)(source7 + 4 * x);
5759 float c8 = *(float*)(source8 + 4 * x);
5760 float c9 = *(float*)(source9 + 4 * x);
5761 float cA = *(float*)(sourceA + 4 * x);
5762 float cB = *(float*)(sourceB + 4 * x);
5763 float cC = *(float*)(sourceC + 4 * x);
5764 float cD = *(float*)(sourceD + 4 * x);
5765 float cE = *(float*)(sourceE + 4 * x);
5766 float cF = *(float*)(sourceF + 4 * x);
5767
5768 c0 = c0 + c1;
5769 c2 = c2 + c3;
5770 c4 = c4 + c5;
5771 c6 = c6 + c7;
5772 c8 = c8 + c9;
5773 cA = cA + cB;
5774 cC = cC + cD;
5775 cE = cE + cF;
5776 c0 = c0 + c2;
5777 c4 = c4 + c6;
5778 c8 = c8 + cA;
5779 cC = cC + cE;
5780 c0 = c0 + c4;
5781 c8 = c8 + cC;
5782 c0 = c0 + c8;
5783 c0 *= 1.0f / 16.0f;
5784
5785 *(float*)(source0 + 4 * x) = c0;
5786 }
5787
5788 source0 += pitch;
5789 source1 += pitch;
5790 source2 += pitch;
5791 source3 += pitch;
5792 source4 += pitch;
5793 source5 += pitch;
5794 source6 += pitch;
5795 source7 += pitch;
5796 source8 += pitch;
5797 source9 += pitch;
5798 sourceA += pitch;
5799 sourceB += pitch;
5800 sourceC += pitch;
5801 sourceD += pitch;
5802 sourceE += pitch;
5803 sourceF += pitch;
5804 }
5805 }
5806 else ASSERT(false);
5807 }
5808 }
5809 else if(internal.format == FORMAT_R5G6B5)
5810 {
5811 #if defined(__i386__) || defined(__x86_64__)
5812 if(CPUID::supportsSSE2() && (width % 8) == 0)
5813 {
5814 if(internal.samples == 2)
5815 {
5816 for(int y = 0; y < height; y++)
5817 {
5818 for(int x = 0; x < width; x += 8)
5819 {
5820 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5821 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5822
5823 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5824 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5825 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5826 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5827 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5828 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5829
5830 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5831 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5832 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5833 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5834 c0 = _mm_or_si128(c0, c1);
5835
5836 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5837 }
5838
5839 source0 += pitch;
5840 source1 += pitch;
5841 }
5842 }
5843 else if(internal.samples == 4)
5844 {
5845 for(int y = 0; y < height; y++)
5846 {
5847 for(int x = 0; x < width; x += 8)
5848 {
5849 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5850 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5851 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5852 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5853
5854 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5855 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5856 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5857 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5858 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5859 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5860 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5861 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5862 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5863 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5864
5865 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5866 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5867 c0 = _mm_avg_epu8(c0, c2);
5868 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5869 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5870 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5871 c1 = _mm_avg_epu16(c1, c3);
5872 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5873 c0 = _mm_or_si128(c0, c1);
5874
5875 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5876 }
5877
5878 source0 += pitch;
5879 source1 += pitch;
5880 source2 += pitch;
5881 source3 += pitch;
5882 }
5883 }
5884 else if(internal.samples == 8)
5885 {
5886 for(int y = 0; y < height; y++)
5887 {
5888 for(int x = 0; x < width; x += 8)
5889 {
5890 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5891 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5892 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5893 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5894 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5895 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5896 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5897 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5898
5899 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5900 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5901 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5902 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5903 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5904 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5905 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5906 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5907 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5908 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5909 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5910 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5911 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5912 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5913 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5914 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5915 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5916 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5917
5918 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5919 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5920 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5921 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5922 c0 = _mm_avg_epu8(c0, c2);
5923 c4 = _mm_avg_epu8(c4, c6);
5924 c0 = _mm_avg_epu8(c0, c4);
5925 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5926 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5927 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5928 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5929 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5930 c1 = _mm_avg_epu16(c1, c3);
5931 c5 = _mm_avg_epu16(c5, c7);
5932 c1 = _mm_avg_epu16(c1, c5);
5933 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5934 c0 = _mm_or_si128(c0, c1);
5935
5936 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5937 }
5938
5939 source0 += pitch;
5940 source1 += pitch;
5941 source2 += pitch;
5942 source3 += pitch;
5943 source4 += pitch;
5944 source5 += pitch;
5945 source6 += pitch;
5946 source7 += pitch;
5947 }
5948 }
5949 else if(internal.samples == 16)
5950 {
5951 for(int y = 0; y < height; y++)
5952 {
5953 for(int x = 0; x < width; x += 8)
5954 {
5955 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5956 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5957 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5958 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5959 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5960 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5961 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5962 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5963 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5964 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5965 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5966 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5967 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5968 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5969 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5970 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
5971
5972 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5973 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5974 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5975 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5976 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5977 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5978 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5979 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5980 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5981 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5982 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5983 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5984 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5985 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5986 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5987 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5988 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5989 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5990 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5991 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5992 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5993 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5994 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5995 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5996 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5997 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5998 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5999 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
6000 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
6001 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
6002 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
6003 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
6004 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
6005 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
6006
6007 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
6008 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
6009 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
6010 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
6011 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
6012 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
6013 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
6014 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
6015 c0 = _mm_avg_epu8(c0, c2);
6016 c4 = _mm_avg_epu8(c4, c6);
6017 c8 = _mm_avg_epu8(c8, cA);
6018 cC = _mm_avg_epu8(cC, cE);
6019 c0 = _mm_avg_epu8(c0, c4);
6020 c8 = _mm_avg_epu8(c8, cC);
6021 c0 = _mm_avg_epu8(c0, c8);
6022 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
6023 c1 = _mm_avg_epu16(c0__g_, c1__g_);
6024 c3 = _mm_avg_epu16(c2__g_, c3__g_);
6025 c5 = _mm_avg_epu16(c4__g_, c5__g_);
6026 c7 = _mm_avg_epu16(c6__g_, c7__g_);
6027 c9 = _mm_avg_epu16(c8__g_, c9__g_);
6028 cB = _mm_avg_epu16(cA__g_, cB__g_);
6029 cD = _mm_avg_epu16(cC__g_, cD__g_);
6030 cF = _mm_avg_epu16(cE__g_, cF__g_);
6031 c1 = _mm_avg_epu8(c1, c3);
6032 c5 = _mm_avg_epu8(c5, c7);
6033 c9 = _mm_avg_epu8(c9, cB);
6034 cD = _mm_avg_epu8(cD, cF);
6035 c1 = _mm_avg_epu8(c1, c5);
6036 c9 = _mm_avg_epu8(c9, cD);
6037 c1 = _mm_avg_epu8(c1, c9);
6038 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
6039 c0 = _mm_or_si128(c0, c1);
6040
6041 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
6042 }
6043
6044 source0 += pitch;
6045 source1 += pitch;
6046 source2 += pitch;
6047 source3 += pitch;
6048 source4 += pitch;
6049 source5 += pitch;
6050 source6 += pitch;
6051 source7 += pitch;
6052 source8 += pitch;
6053 source9 += pitch;
6054 sourceA += pitch;
6055 sourceB += pitch;
6056 sourceC += pitch;
6057 sourceD += pitch;
6058 sourceE += pitch;
6059 sourceF += pitch;
6060 }
6061 }
6062 else ASSERT(false);
6063 }
6064 else
6065 #endif
6066 {
6067 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
6068
6069 if(internal.samples == 2)
6070 {
6071 for(int y = 0; y < height; y++)
6072 {
6073 for(int x = 0; x < width; x++)
6074 {
6075 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6076 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6077
6078 c0 = AVERAGE(c0, c1);
6079
6080 *(unsigned short*)(source0 + 2 * x) = c0;
6081 }
6082
6083 source0 += pitch;
6084 source1 += pitch;
6085 }
6086 }
6087 else if(internal.samples == 4)
6088 {
6089 for(int y = 0; y < height; y++)
6090 {
6091 for(int x = 0; x < width; x++)
6092 {
6093 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6094 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6095 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6096 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6097
6098 c0 = AVERAGE(c0, c1);
6099 c2 = AVERAGE(c2, c3);
6100 c0 = AVERAGE(c0, c2);
6101
6102 *(unsigned short*)(source0 + 2 * x) = c0;
6103 }
6104
6105 source0 += pitch;
6106 source1 += pitch;
6107 source2 += pitch;
6108 source3 += pitch;
6109 }
6110 }
6111 else if(internal.samples == 8)
6112 {
6113 for(int y = 0; y < height; y++)
6114 {
6115 for(int x = 0; x < width; x++)
6116 {
6117 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6118 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6119 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6120 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6121 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6122 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6123 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6124 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6125
6126 c0 = AVERAGE(c0, c1);
6127 c2 = AVERAGE(c2, c3);
6128 c4 = AVERAGE(c4, c5);
6129 c6 = AVERAGE(c6, c7);
6130 c0 = AVERAGE(c0, c2);
6131 c4 = AVERAGE(c4, c6);
6132 c0 = AVERAGE(c0, c4);
6133
6134 *(unsigned short*)(source0 + 2 * x) = c0;
6135 }
6136
6137 source0 += pitch;
6138 source1 += pitch;
6139 source2 += pitch;
6140 source3 += pitch;
6141 source4 += pitch;
6142 source5 += pitch;
6143 source6 += pitch;
6144 source7 += pitch;
6145 }
6146 }
6147 else if(internal.samples == 16)
6148 {
6149 for(int y = 0; y < height; y++)
6150 {
6151 for(int x = 0; x < width; x++)
6152 {
6153 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6154 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6155 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6156 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6157 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6158 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6159 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6160 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6161 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
6162 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
6163 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
6164 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
6165 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
6166 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
6167 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
6168 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
6169
6170 c0 = AVERAGE(c0, c1);
6171 c2 = AVERAGE(c2, c3);
6172 c4 = AVERAGE(c4, c5);
6173 c6 = AVERAGE(c6, c7);
6174 c8 = AVERAGE(c8, c9);
6175 cA = AVERAGE(cA, cB);
6176 cC = AVERAGE(cC, cD);
6177 cE = AVERAGE(cE, cF);
6178 c0 = AVERAGE(c0, c2);
6179 c4 = AVERAGE(c4, c6);
6180 c8 = AVERAGE(c8, cA);
6181 cC = AVERAGE(cC, cE);
6182 c0 = AVERAGE(c0, c4);
6183 c8 = AVERAGE(c8, cC);
6184 c0 = AVERAGE(c0, c8);
6185
6186 *(unsigned short*)(source0 + 2 * x) = c0;
6187 }
6188
6189 source0 += pitch;
6190 source1 += pitch;
6191 source2 += pitch;
6192 source3 += pitch;
6193 source4 += pitch;
6194 source5 += pitch;
6195 source6 += pitch;
6196 source7 += pitch;
6197 source8 += pitch;
6198 source9 += pitch;
6199 sourceA += pitch;
6200 sourceB += pitch;
6201 sourceC += pitch;
6202 sourceD += pitch;
6203 sourceE += pitch;
6204 sourceF += pitch;
6205 }
6206 }
6207 else ASSERT(false);
6208
6209 #undef AVERAGE
6210 }
6211 }
6212 else
6213 {
6214 // UNIMPLEMENTED();
6215 }
6216 }
6217}
6218