1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#ifdef SDL_HAVE_BLIT_N
24
25#include "SDL_surface_c.h"
26#include "SDL_blit_copy.h"
27
28// General optimized routines that write char by char
29#define HAVE_FAST_WRITE_INT8 1
30
31// On some CPU, it's slower than combining and write a word
32#ifdef __MIPS__
33#undef HAVE_FAST_WRITE_INT8
34#define HAVE_FAST_WRITE_INT8 0
35#endif
36
37// Functions to blit from N-bit surfaces to other surfaces
38
39#define BLIT_FEATURE_NONE 0x00
40#define BLIT_FEATURE_HAS_MMX 0x01
41#define BLIT_FEATURE_HAS_ALTIVEC 0x02
42#define BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH 0x04
43
44#ifdef SDL_ALTIVEC_BLITTERS
45#ifdef SDL_PLATFORM_MACOS
46#include <sys/sysctl.h>
47static size_t GetL3CacheSize(void)
48{
49 const char key[] = "hw.l3cachesize";
50 u_int64_t result = 0;
51 size_t typeSize = sizeof(result);
52
53 int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
54 if (0 != err) {
55 return 0;
56 }
57
58 return result;
59}
60#else
61static size_t GetL3CacheSize(void)
62{
63 // XXX: Just guess G4
64 return 2097152;
65}
66#endif // SDL_PLATFORM_MACOS
67
68#if (defined(SDL_PLATFORM_MACOS) && (__GNUC__ < 4))
69#define VECUINT8_LITERAL(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
70 (vector unsigned char)(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p)
71#define VECUINT16_LITERAL(a, b, c, d, e, f, g, h) \
72 (vector unsigned short)(a, b, c, d, e, f, g, h)
73#else
74#define VECUINT8_LITERAL(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
75 (vector unsigned char) \
76 { \
77 a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p \
78 }
79#define VECUINT16_LITERAL(a, b, c, d, e, f, g, h) \
80 (vector unsigned short) \
81 { \
82 a, b, c, d, e, f, g, h \
83 }
84#endif
85
86#define UNALIGNED_PTR(x) (((size_t)x) & 0x0000000F)
87#define VSWIZZLE32(a, b, c, d) (vector unsigned char)(0x00 + a, 0x00 + b, 0x00 + c, 0x00 + d, \
88 0x04 + a, 0x04 + b, 0x04 + c, 0x04 + d, \
89 0x08 + a, 0x08 + b, 0x08 + c, 0x08 + d, \
90 0x0C + a, 0x0C + b, 0x0C + c, 0x0C + d)
91
92#define MAKE8888(dstfmt, r, g, b, a) \
93 (((r << dstfmt->Rshift) & dstfmt->Rmask) | \
94 ((g << dstfmt->Gshift) & dstfmt->Gmask) | \
95 ((b << dstfmt->Bshift) & dstfmt->Bmask) | \
96 ((a << dstfmt->Ashift) & dstfmt->Amask))
97
98/*
99 * Data Stream Touch...Altivec cache prefetching.
100 *
101 * Don't use this on a G5...however, the speed boost is very significant
102 * on a G4.
103 */
104#define DST_CHAN_SRC 1
105#define DST_CHAN_DEST 2
106
107// macro to set DST control word value...
108#define DST_CTRL(size, count, stride) \
109 (((size) << 24) | ((count) << 16) | (stride))
110
111#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
112 ? vec_lvsl(0, src) \
113 : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
114
115// Calculate the permute vector used for 32->32 swizzling
116static vector unsigned char calc_swizzle32(const SDL_PixelFormatDetails *srcfmt, const SDL_PixelFormatDetails *dstfmt)
117{
118 /*
119 * We have to assume that the bits that aren't used by other
120 * colors is alpha, and it's one complete byte, since some formats
121 * leave alpha with a zero mask, but we should still swizzle the bits.
122 */
123 // ARGB
124 static const SDL_PixelFormatDetails default_pixel_format = {
125 SDL_PIXELFORMAT_ARGB8888, 0, 0, { 0, 0 }, 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, 8, 8, 8, 8, 16, 8, 0, 24
126 };
127 const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
128 0x04, 0x04, 0x04, 0x04,
129 0x08, 0x08, 0x08, 0x08,
130 0x0C, 0x0C, 0x0C,
131 0x0C);
132 vector unsigned char vswiz;
133 vector unsigned int srcvec;
134 Uint32 rmask, gmask, bmask, amask;
135
136 if (!srcfmt) {
137 srcfmt = &default_pixel_format;
138 }
139 if (!dstfmt) {
140 dstfmt = &default_pixel_format;
141 }
142
143#define RESHIFT(X) (3 - ((X) >> 3))
144 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
145 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
146 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
147
148 // Use zero for alpha if either surface doesn't have alpha
149 if (dstfmt->Amask) {
150 amask =
151 ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
152 } else {
153 amask =
154 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
155 0xFFFFFFFF);
156 }
157#undef RESHIFT
158
159 ((unsigned int *)(char *)&srcvec)[0] = (rmask | gmask | bmask | amask);
160 vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
161 return (vswiz);
162}
163
164#if SDL_BYTEORDER == SDL_LIL_ENDIAN
165// reorder bytes for PowerPC little endian
166static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
167{
168 /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
169 The LE transformation for vec_perm has an implicit assumption
170 that the permutation is being used to reorder vector elements,
171 not to reorder bytes within those elements.
172 Unfortunately the result order is not the expected one for powerpc
173 little endian when the two first vector parameters of vec_perm are
174 not of type 'vector char'. This is because the numbering from the
175 left for BE, and numbering from the right for LE, produces a
176 different interpretation of what the odd and even lanes are.
177 Refer to fedora bug 1392465
178 */
179
180 const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
181 0x01, 0x00, 0x03, 0x02,
182 0x05, 0x04, 0x07, 0x06,
183 0x09, 0x08, 0x0B, 0x0A,
184 0x0D, 0x0C, 0x0F, 0x0E);
185
186 vector unsigned char vswiz_ppc64le;
187 vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
188 return (vswiz_ppc64le);
189}
190#endif
191
192static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info);
193static void Blit_XRGB8888_RGB565Altivec(SDL_BlitInfo *info)
194{
195 int height = info->dst_h;
196 Uint8 *src = (Uint8 *)info->src;
197 int srcskip = info->src_skip;
198 Uint8 *dst = (Uint8 *)info->dst;
199 int dstskip = info->dst_skip;
200 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
201 vector unsigned char valpha = vec_splat_u8(0);
202 vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
203 vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
204 0x00, 0x0a, 0x00, 0x0e,
205 0x00, 0x12, 0x00, 0x16,
206 0x00, 0x1a, 0x00, 0x1e);
207 vector unsigned short v1 = vec_splat_u16(1);
208 vector unsigned short v3 = vec_splat_u16(3);
209 vector unsigned short v3f =
210 VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
211 0x003f, 0x003f, 0x003f, 0x003f);
212 vector unsigned short vfc =
213 VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
214 0x00fc, 0x00fc, 0x00fc, 0x00fc);
215 vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
216 vf800 = vec_sl(vf800, vec_splat_u16(8));
217
218 while (height--) {
219 vector unsigned char valigner;
220 vector unsigned char voverflow;
221 vector unsigned char vsrc;
222
223 int width = info->dst_w;
224 int extrawidth;
225
226 // do scalar until we can align...
227#define ONE_PIXEL_BLEND(condition, widthvar) \
228 while (condition) { \
229 Uint32 Pixel; \
230 unsigned sR, sG, sB, sA; \
231 DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
232 sR, sG, sB, sA); \
233 *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
234 ((sG << 3) & 0x000007E0) | \
235 ((sB >> 3) & 0x0000001F)); \
236 dst += 2; \
237 src += 4; \
238 widthvar--; \
239 }
240
241 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
242
243 // After all that work, here's the vector part!
244 extrawidth = (width % 8); // trailing unaligned stores
245 width -= extrawidth;
246 vsrc = vec_ld(0, src);
247 valigner = VEC_ALIGNER(src);
248
249 while (width) {
250 vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
251 vector unsigned int vsrc1, vsrc2;
252 vector unsigned char vdst;
253
254 voverflow = vec_ld(15, src);
255 vsrc = vec_perm(vsrc, voverflow, valigner);
256 vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
257 src += 16;
258 vsrc = voverflow;
259 voverflow = vec_ld(15, src);
260 vsrc = vec_perm(vsrc, voverflow, valigner);
261 vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
262 // 1555
263 vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
264 vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
265 vgpixel = vec_and(vgpixel, vfc);
266 vgpixel = vec_sl(vgpixel, v3);
267 vrpixel = vec_sl(vpixel, v1);
268 vrpixel = vec_and(vrpixel, vf800);
269 vbpixel = vec_and(vpixel, v3f);
270 vdst =
271 vec_or((vector unsigned char)vrpixel,
272 (vector unsigned char)vgpixel);
273 // 565
274 vdst = vec_or(vdst, (vector unsigned char)vbpixel);
275 vec_st(vdst, 0, dst);
276
277 width -= 8;
278 src += 16;
279 dst += 16;
280 vsrc = voverflow;
281 }
282
283 SDL_assert(width == 0);
284
285 // do scalar until we can align...
286 ONE_PIXEL_BLEND((extrawidth), extrawidth);
287#undef ONE_PIXEL_BLEND
288
289 src += srcskip; // move to next row, accounting for pitch.
290 dst += dstskip;
291 }
292}
293
294static void Blit_RGB565_32Altivec(SDL_BlitInfo *info)
295{
296 int height = info->dst_h;
297 Uint8 *src = (Uint8 *)info->src;
298 int srcskip = info->src_skip;
299 Uint8 *dst = (Uint8 *)info->dst;
300 int dstskip = info->dst_skip;
301 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
302 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
303 unsigned alpha;
304 vector unsigned char valpha;
305 vector unsigned char vpermute;
306 vector unsigned short vf800;
307 vector unsigned int v8 = vec_splat_u32(8);
308 vector unsigned int v16 = vec_add(v8, v8);
309 vector unsigned short v2 = vec_splat_u16(2);
310 vector unsigned short v3 = vec_splat_u16(3);
311 /*
312 0x10 - 0x1f is the alpha
313 0x00 - 0x0e evens are the red
314 0x01 - 0x0f odds are zero
315 */
316 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
317 0x10, 0x02, 0x01, 0x01,
318 0x10, 0x04, 0x01, 0x01,
319 0x10, 0x06, 0x01,
320 0x01);
321 vector unsigned char vredalpha2 =
322 (vector unsigned char)(vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16)));
323 /*
324 0x00 - 0x0f is ARxx ARxx ARxx ARxx
325 0x11 - 0x0f odds are blue
326 */
327 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
328 0x04, 0x05, 0x06, 0x13,
329 0x08, 0x09, 0x0a, 0x15,
330 0x0c, 0x0d, 0x0e, 0x17);
331 vector unsigned char vblue2 =
332 (vector unsigned char)(vec_add((vector unsigned int)vblue1, v8));
333 /*
334 0x00 - 0x0f is ARxB ARxB ARxB ARxB
335 0x10 - 0x0e evens are green
336 */
337 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
338 0x04, 0x05, 0x12, 0x07,
339 0x08, 0x09, 0x14, 0x0b,
340 0x0c, 0x0d, 0x16, 0x0f);
341 vector unsigned char vgreen2 =
342 (vector unsigned char)(vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8)));
343
344 SDL_assert(srcfmt->bytes_per_pixel == 2);
345 SDL_assert(dstfmt->bytes_per_pixel == 4);
346
347 vf800 = (vector unsigned short)vec_splat_u8(-7);
348 vf800 = vec_sl(vf800, vec_splat_u16(8));
349
350 if (dstfmt->Amask && info->a) {
351 ((unsigned char *)&valpha)[0] = alpha = info->a;
352 valpha = vec_splat(valpha, 0);
353 } else {
354 alpha = 0;
355 valpha = vec_splat_u8(0);
356 }
357
358 vpermute = calc_swizzle32(NULL, dstfmt);
359 while (height--) {
360 vector unsigned char valigner;
361 vector unsigned char voverflow;
362 vector unsigned char vsrc;
363
364 int width = info->dst_w;
365 int extrawidth;
366
367 // do scalar until we can align...
368#define ONE_PIXEL_BLEND(condition, widthvar) \
369 while (condition) { \
370 unsigned sR, sG, sB; \
371 unsigned short Pixel = *((unsigned short *)src); \
372 sR = (Pixel >> 8) & 0xf8; \
373 sG = (Pixel >> 3) & 0xfc; \
374 sB = (Pixel << 3) & 0xf8; \
375 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
376 src += 2; \
377 dst += 4; \
378 widthvar--; \
379 }
380 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
381
382 // After all that work, here's the vector part!
383 extrawidth = (width % 8); // trailing unaligned stores
384 width -= extrawidth;
385 vsrc = vec_ld(0, src);
386 valigner = VEC_ALIGNER(src);
387
388 while (width) {
389 vector unsigned short vR, vG, vB;
390 vector unsigned char vdst1, vdst2;
391
392 voverflow = vec_ld(15, src);
393 vsrc = vec_perm(vsrc, voverflow, valigner);
394
395 vR = vec_and((vector unsigned short)vsrc, vf800);
396 vB = vec_sl((vector unsigned short)vsrc, v3);
397 vG = vec_sl(vB, v2);
398
399 vdst1 =
400 (vector unsigned char)vec_perm((vector unsigned char)vR,
401 valpha, vredalpha1);
402 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
403 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
404 vdst1 = vec_perm(vdst1, valpha, vpermute);
405 vec_st(vdst1, 0, dst);
406
407 vdst2 =
408 (vector unsigned char)vec_perm((vector unsigned char)vR,
409 valpha, vredalpha2);
410 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
411 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
412 vdst2 = vec_perm(vdst2, valpha, vpermute);
413 vec_st(vdst2, 16, dst);
414
415 width -= 8;
416 dst += 32;
417 src += 16;
418 vsrc = voverflow;
419 }
420
421 SDL_assert(width == 0);
422
423 // do scalar until we can align...
424 ONE_PIXEL_BLEND((extrawidth), extrawidth);
425#undef ONE_PIXEL_BLEND
426
427 src += srcskip; // move to next row, accounting for pitch.
428 dst += dstskip;
429 }
430}
431
432static void Blit_RGB555_32Altivec(SDL_BlitInfo *info)
433{
434 int height = info->dst_h;
435 Uint8 *src = (Uint8 *)info->src;
436 int srcskip = info->src_skip;
437 Uint8 *dst = (Uint8 *)info->dst;
438 int dstskip = info->dst_skip;
439 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
440 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
441 unsigned alpha;
442 vector unsigned char valpha;
443 vector unsigned char vpermute;
444 vector unsigned short vf800;
445 vector unsigned int v8 = vec_splat_u32(8);
446 vector unsigned int v16 = vec_add(v8, v8);
447 vector unsigned short v1 = vec_splat_u16(1);
448 vector unsigned short v3 = vec_splat_u16(3);
449 /*
450 0x10 - 0x1f is the alpha
451 0x00 - 0x0e evens are the red
452 0x01 - 0x0f odds are zero
453 */
454 vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
455 0x10, 0x02, 0x01, 0x01,
456 0x10, 0x04, 0x01, 0x01,
457 0x10, 0x06, 0x01,
458 0x01);
459 vector unsigned char vredalpha2 =
460 (vector unsigned char)(vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16)));
461 /*
462 0x00 - 0x0f is ARxx ARxx ARxx ARxx
463 0x11 - 0x0f odds are blue
464 */
465 vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
466 0x04, 0x05, 0x06, 0x13,
467 0x08, 0x09, 0x0a, 0x15,
468 0x0c, 0x0d, 0x0e, 0x17);
469 vector unsigned char vblue2 =
470 (vector unsigned char)(vec_add((vector unsigned int)vblue1, v8));
471 /*
472 0x00 - 0x0f is ARxB ARxB ARxB ARxB
473 0x10 - 0x0e evens are green
474 */
475 vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
476 0x04, 0x05, 0x12, 0x07,
477 0x08, 0x09, 0x14, 0x0b,
478 0x0c, 0x0d, 0x16, 0x0f);
479 vector unsigned char vgreen2 =
480 (vector unsigned char)(vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8)));
481
482 SDL_assert(srcfmt->bytes_per_pixel == 2);
483 SDL_assert(dstfmt->bytes_per_pixel == 4);
484
485 vf800 = (vector unsigned short)vec_splat_u8(-7);
486 vf800 = vec_sl(vf800, vec_splat_u16(8));
487
488 if (dstfmt->Amask && info->a) {
489 ((unsigned char *)&valpha)[0] = alpha = info->a;
490 valpha = vec_splat(valpha, 0);
491 } else {
492 alpha = 0;
493 valpha = vec_splat_u8(0);
494 }
495
496 vpermute = calc_swizzle32(NULL, dstfmt);
497 while (height--) {
498 vector unsigned char valigner;
499 vector unsigned char voverflow;
500 vector unsigned char vsrc;
501
502 int width = info->dst_w;
503 int extrawidth;
504
505 // do scalar until we can align...
506#define ONE_PIXEL_BLEND(condition, widthvar) \
507 while (condition) { \
508 unsigned sR, sG, sB; \
509 unsigned short Pixel = *((unsigned short *)src); \
510 sR = (Pixel >> 7) & 0xf8; \
511 sG = (Pixel >> 2) & 0xf8; \
512 sB = (Pixel << 3) & 0xf8; \
513 ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
514 src += 2; \
515 dst += 4; \
516 widthvar--; \
517 }
518 ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
519
520 // After all that work, here's the vector part!
521 extrawidth = (width % 8); // trailing unaligned stores
522 width -= extrawidth;
523 vsrc = vec_ld(0, src);
524 valigner = VEC_ALIGNER(src);
525
526 while (width) {
527 vector unsigned short vR, vG, vB;
528 vector unsigned char vdst1, vdst2;
529
530 voverflow = vec_ld(15, src);
531 vsrc = vec_perm(vsrc, voverflow, valigner);
532
533 vR = vec_and(vec_sl((vector unsigned short)vsrc, v1), vf800);
534 vB = vec_sl((vector unsigned short)vsrc, v3);
535 vG = vec_sl(vB, v3);
536
537 vdst1 =
538 (vector unsigned char)vec_perm((vector unsigned char)vR,
539 valpha, vredalpha1);
540 vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
541 vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
542 vdst1 = vec_perm(vdst1, valpha, vpermute);
543 vec_st(vdst1, 0, dst);
544
545 vdst2 =
546 (vector unsigned char)vec_perm((vector unsigned char)vR,
547 valpha, vredalpha2);
548 vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
549 vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
550 vdst2 = vec_perm(vdst2, valpha, vpermute);
551 vec_st(vdst2, 16, dst);
552
553 width -= 8;
554 dst += 32;
555 src += 16;
556 vsrc = voverflow;
557 }
558
559 SDL_assert(width == 0);
560
561 // do scalar until we can align...
562 ONE_PIXEL_BLEND((extrawidth), extrawidth);
563#undef ONE_PIXEL_BLEND
564
565 src += srcskip; // move to next row, accounting for pitch.
566 dst += dstskip;
567 }
568}
569
570static void BlitNtoNKey(SDL_BlitInfo *info);
571static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
572static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
573{
574 int height = info->dst_h;
575 Uint32 *srcp = (Uint32 *)info->src;
576 int srcskip = info->src_skip / 4;
577 Uint32 *dstp = (Uint32 *)info->dst;
578 int dstskip = info->dst_skip / 4;
579 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
580 int srcbpp = srcfmt->bytes_per_pixel;
581 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
582 int dstbpp = dstfmt->bytes_per_pixel;
583 int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
584 unsigned alpha = dstfmt->Amask ? info->a : 0;
585 Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
586 Uint32 ckey = info->colorkey;
587 vector unsigned int valpha;
588 vector unsigned char vpermute;
589 vector unsigned char vzero;
590 vector unsigned int vckey;
591 vector unsigned int vrgbmask;
592 vpermute = calc_swizzle32(srcfmt, dstfmt);
593 if (info->dst_w < 16) {
594 if (copy_alpha) {
595 BlitNtoNKeyCopyAlpha(info);
596 } else {
597 BlitNtoNKey(info);
598 }
599 return;
600 }
601 vzero = vec_splat_u8(0);
602 if (alpha) {
603 ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
604 valpha =
605 (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
606 } else {
607 valpha = (vector unsigned int)vzero;
608 }
609 ckey &= rgbmask;
610 ((unsigned int *)(char *)&vckey)[0] = ckey;
611 vckey = vec_splat(vckey, 0);
612 ((unsigned int *)(char *)&vrgbmask)[0] = rgbmask;
613 vrgbmask = vec_splat(vrgbmask, 0);
614
615#if SDL_BYTEORDER == SDL_LIL_ENDIAN
616 // reorder bytes for PowerPC little endian
617 vpermute = reorder_ppc64le_vec(vpermute);
618#endif
619
620 while (height--) {
621#define ONE_PIXEL_BLEND(condition, widthvar) \
622 if (copy_alpha) { \
623 while (condition) { \
624 Uint32 Pixel; \
625 unsigned sR, sG, sB, sA; \
626 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
627 sR, sG, sB, sA); \
628 if ((Pixel & rgbmask) != ckey) { \
629 ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
630 sR, sG, sB, sA); \
631 } \
632 dstp = (Uint32 *)(((Uint8 *)dstp) + dstbpp); \
633 srcp = (Uint32 *)(((Uint8 *)srcp) + srcbpp); \
634 widthvar--; \
635 } \
636 } else { \
637 while (condition) { \
638 Uint32 Pixel; \
639 unsigned sR, sG, sB; \
640 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
641 if (Pixel != ckey) { \
642 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
643 ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
644 sR, sG, sB, alpha); \
645 } \
646 dstp = (Uint32 *)(((Uint8 *)dstp) + dstbpp); \
647 srcp = (Uint32 *)(((Uint8 *)srcp) + srcbpp); \
648 widthvar--; \
649 } \
650 }
651 int width = info->dst_w;
652 ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
653 SDL_assert(width > 0);
654 if (width > 0) {
655 int extrawidth = (width % 4);
656 vector unsigned char valigner = VEC_ALIGNER(srcp);
657 vector unsigned int vs = vec_ld(0, srcp);
658 width -= extrawidth;
659 SDL_assert(width >= 4);
660 while (width) {
661 vector unsigned char vsel;
662 vector unsigned int vd;
663 vector unsigned int voverflow = vec_ld(15, srcp);
664 // load the source vec
665 vs = vec_perm(vs, voverflow, valigner);
666 // vsel is set for items that match the key
667 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
668 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
669 // permute the src vec to the dest format
670 vs = vec_perm(vs, valpha, vpermute);
671 // load the destination vec
672 vd = vec_ld(0, dstp);
673 // select the source and dest into vs
674 vd = (vector unsigned int)vec_sel((vector unsigned char)vs,
675 (vector unsigned char)vd,
676 vsel);
677
678 vec_st(vd, 0, dstp);
679 srcp += 4;
680 width -= 4;
681 dstp += 4;
682 vs = voverflow;
683 }
684 ONE_PIXEL_BLEND((extrawidth), extrawidth);
685#undef ONE_PIXEL_BLEND
686 srcp += srcskip;
687 dstp += dstskip;
688 }
689 }
690}
691
692// Altivec code to swizzle one 32-bit surface to a different 32-bit format.
693// Use this on a G5
694static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
695{
696 int height = info->dst_h;
697 Uint32 *src = (Uint32 *)info->src;
698 int srcskip = info->src_skip / 4;
699 Uint32 *dst = (Uint32 *)info->dst;
700 int dstskip = info->dst_skip / 4;
701 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
702 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
703 vector unsigned int vzero = vec_splat_u32(0);
704 vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
705 if (dstfmt->Amask && !srcfmt->Amask) {
706 if (info->a) {
707 vector unsigned char valpha;
708 ((unsigned char *)&valpha)[0] = info->a;
709 vzero = (vector unsigned int)vec_splat(valpha, 0);
710 }
711 }
712
713 SDL_assert(srcfmt->bytes_per_pixel == 4);
714 SDL_assert(dstfmt->bytes_per_pixel == 4);
715
716#if SDL_BYTEORDER == SDL_LIL_ENDIAN
717 // reorder bytes for PowerPC little endian
718 vpermute = reorder_ppc64le_vec(vpermute);
719#endif
720
721 while (height--) {
722 vector unsigned char valigner;
723 vector unsigned int vbits;
724 vector unsigned int voverflow;
725 Uint32 bits;
726 Uint8 r, g, b, a;
727
728 int width = info->dst_w;
729 int extrawidth;
730
731 // do scalar until we can align...
732 while ((UNALIGNED_PTR(dst)) && (width)) {
733 bits = *(src++);
734 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
735 if (!srcfmt->Amask)
736 a = info->a;
737 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
738 width--;
739 }
740
741 // After all that work, here's the vector part!
742 extrawidth = (width % 4);
743 width -= extrawidth;
744 valigner = VEC_ALIGNER(src);
745 vbits = vec_ld(0, src);
746
747 while (width) {
748 voverflow = vec_ld(15, src);
749 src += 4;
750 width -= 4;
751 vbits = vec_perm(vbits, voverflow, valigner); // src is ready.
752 vbits = vec_perm(vbits, vzero, vpermute); // swizzle it.
753 vec_st(vbits, 0, dst); // store it back out.
754 dst += 4;
755 vbits = voverflow;
756 }
757
758 SDL_assert(width == 0);
759
760 // cover pixels at the end of the row that didn't fit in 16 bytes.
761 while (extrawidth) {
762 bits = *(src++); // max 7 pixels, don't bother with prefetch.
763 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
764 if (!srcfmt->Amask)
765 a = info->a;
766 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
767 extrawidth--;
768 }
769
770 src += srcskip;
771 dst += dstskip;
772 }
773}
774
775// Altivec code to swizzle one 32-bit surface to a different 32-bit format.
776// Use this on a G4
777static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
778{
779 const int scalar_dst_lead = sizeof(Uint32) * 4;
780 const int vector_dst_lead = sizeof(Uint32) * 16;
781
782 int height = info->dst_h;
783 Uint32 *src = (Uint32 *)info->src;
784 int srcskip = info->src_skip / 4;
785 Uint32 *dst = (Uint32 *)info->dst;
786 int dstskip = info->dst_skip / 4;
787 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
788 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
789 vector unsigned int vzero = vec_splat_u32(0);
790 vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
791 if (dstfmt->Amask && !srcfmt->Amask) {
792 if (info->a) {
793 vector unsigned char valpha;
794 ((unsigned char *)&valpha)[0] = info->a;
795 vzero = (vector unsigned int)vec_splat(valpha, 0);
796 }
797 }
798
799 SDL_assert(srcfmt->bytes_per_pixel == 4);
800 SDL_assert(dstfmt->bytes_per_pixel == 4);
801
802#if SDL_BYTEORDER == SDL_LIL_ENDIAN
803 // reorder bytes for PowerPC little endian
804 vpermute = reorder_ppc64le_vec(vpermute);
805#endif
806
807 while (height--) {
808 vector unsigned char valigner;
809 vector unsigned int vbits;
810 vector unsigned int voverflow;
811 Uint32 bits;
812 Uint8 r, g, b, a;
813
814 int width = info->dst_w;
815 int extrawidth;
816
817 // do scalar until we can align...
818 while ((UNALIGNED_PTR(dst)) && (width)) {
819 vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
820 DST_CHAN_SRC);
821 vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
822 DST_CHAN_DEST);
823 bits = *(src++);
824 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
825 if (!srcfmt->Amask)
826 a = info->a;
827 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
828 width--;
829 }
830
831 // After all that work, here's the vector part!
832 extrawidth = (width % 4);
833 width -= extrawidth;
834 valigner = VEC_ALIGNER(src);
835 vbits = vec_ld(0, src);
836
837 while (width) {
838 vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
839 DST_CHAN_SRC);
840 vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
841 DST_CHAN_DEST);
842 voverflow = vec_ld(15, src);
843 src += 4;
844 width -= 4;
845 vbits = vec_perm(vbits, voverflow, valigner); // src is ready.
846 vbits = vec_perm(vbits, vzero, vpermute); // swizzle it.
847 vec_st(vbits, 0, dst); // store it back out.
848 dst += 4;
849 vbits = voverflow;
850 }
851
852 SDL_assert(width == 0);
853
854 // cover pixels at the end of the row that didn't fit in 16 bytes.
855 while (extrawidth) {
856 bits = *(src++); // max 7 pixels, don't bother with prefetch.
857 RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
858 if (!srcfmt->Amask)
859 a = info->a;
860 *(dst++) = MAKE8888(dstfmt, r, g, b, a);
861 extrawidth--;
862 }
863
864 src += srcskip;
865 dst += dstskip;
866 }
867
868 vec_dss(DST_CHAN_SRC);
869 vec_dss(DST_CHAN_DEST);
870}
871
872static Uint32 GetBlitFeatures(void)
873{
874 static Uint32 features = ~0u;
875 if (features == ~0u) {
876 features = (0
877 // Feature 1 is has-MMX
878 | ((SDL_HasMMX()) ? BLIT_FEATURE_HAS_MMX : 0)
879 // Feature 2 is has-AltiVec
880 | ((SDL_HasAltiVec()) ? BLIT_FEATURE_HAS_ALTIVEC : 0)
881 // Feature 4 is dont-use-prefetch
882 // !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4.
883 | ((GetL3CacheSize() == 0) ? BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH : 0));
884 }
885 return features;
886}
887
888#ifdef __MWERKS__
889#pragma altivec_model off
890#endif
891#else
892// Feature 1 is has-MMX
893#define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0))
894#endif
895
896// This is now endian dependent
897#if SDL_BYTEORDER == SDL_LIL_ENDIAN
898#define HI 1
899#define LO 0
900#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
901#define HI 0
902#define LO 1
903#endif
904
905// Special optimized blit for RGB 8-8-8 --> RGB 5-5-5
906#define RGB888_RGB555(dst, src) \
907 { \
908 *(Uint16 *)(dst) = (Uint16)((((*src) & 0x00F80000) >> 9) | \
909 (((*src) & 0x0000F800) >> 6) | \
910 (((*src) & 0x000000F8) >> 3)); \
911 }
912#ifndef USE_DUFFS_LOOP
913#define RGB888_RGB555_TWO(dst, src) \
914 { \
915 *(Uint32 *)(dst) = (((((src[HI]) & 0x00F80000) >> 9) | \
916 (((src[HI]) & 0x0000F800) >> 6) | \
917 (((src[HI]) & 0x000000F8) >> 3)) \
918 << 16) | \
919 (((src[LO]) & 0x00F80000) >> 9) | \
920 (((src[LO]) & 0x0000F800) >> 6) | \
921 (((src[LO]) & 0x000000F8) >> 3); \
922 }
923#endif
924static void Blit_XRGB8888_RGB555(SDL_BlitInfo *info)
925{
926#ifndef USE_DUFFS_LOOP
927 int c;
928#endif
929 int width, height;
930 Uint32 *src;
931 Uint16 *dst;
932 int srcskip, dstskip;
933
934 // Set up some basic variables
935 width = info->dst_w;
936 height = info->dst_h;
937 src = (Uint32 *)info->src;
938 srcskip = info->src_skip / 4;
939 dst = (Uint16 *)info->dst;
940 dstskip = info->dst_skip / 2;
941
942#ifdef USE_DUFFS_LOOP
943 while (height--) {
944 /* *INDENT-OFF* */ // clang-format off
945 DUFFS_LOOP(
946 RGB888_RGB555(dst, src);
947 ++src;
948 ++dst;
949 , width);
950 /* *INDENT-ON* */ // clang-format on
951 src += srcskip;
952 dst += dstskip;
953 }
954#else
955 // Memory align at 4-byte boundary, if necessary
956 if ((long)dst & 0x03) {
957 // Don't do anything if width is 0
958 if (width == 0) {
959 return;
960 }
961 --width;
962
963 while (height--) {
964 // Perform copy alignment
965 RGB888_RGB555(dst, src);
966 ++src;
967 ++dst;
968
969 // Copy in 4 pixel chunks
970 for (c = width / 4; c; --c) {
971 RGB888_RGB555_TWO(dst, src);
972 src += 2;
973 dst += 2;
974 RGB888_RGB555_TWO(dst, src);
975 src += 2;
976 dst += 2;
977 }
978 // Get any leftovers
979 switch (width & 3) {
980 case 3:
981 RGB888_RGB555(dst, src);
982 ++src;
983 ++dst;
984 SDL_FALLTHROUGH;
985 case 2:
986 RGB888_RGB555_TWO(dst, src);
987 src += 2;
988 dst += 2;
989 break;
990 case 1:
991 RGB888_RGB555(dst, src);
992 ++src;
993 ++dst;
994 break;
995 }
996 src += srcskip;
997 dst += dstskip;
998 }
999 } else {
1000 while (height--) {
1001 // Copy in 4 pixel chunks
1002 for (c = width / 4; c; --c) {
1003 RGB888_RGB555_TWO(dst, src);
1004 src += 2;
1005 dst += 2;
1006 RGB888_RGB555_TWO(dst, src);
1007 src += 2;
1008 dst += 2;
1009 }
1010 // Get any leftovers
1011 switch (width & 3) {
1012 case 3:
1013 RGB888_RGB555(dst, src);
1014 ++src;
1015 ++dst;
1016 SDL_FALLTHROUGH;
1017 case 2:
1018 RGB888_RGB555_TWO(dst, src);
1019 src += 2;
1020 dst += 2;
1021 break;
1022 case 1:
1023 RGB888_RGB555(dst, src);
1024 ++src;
1025 ++dst;
1026 break;
1027 }
1028 src += srcskip;
1029 dst += dstskip;
1030 }
1031 }
1032#endif // USE_DUFFS_LOOP
1033}
1034
1035// Special optimized blit for RGB 8-8-8 --> RGB 5-6-5
1036#define RGB888_RGB565(dst, src) \
1037 { \
1038 *(Uint16 *)(dst) = (Uint16)((((*src) & 0x00F80000) >> 8) | \
1039 (((*src) & 0x0000FC00) >> 5) | \
1040 (((*src) & 0x000000F8) >> 3)); \
1041 }
1042#ifndef USE_DUFFS_LOOP
1043#define RGB888_RGB565_TWO(dst, src) \
1044 { \
1045 *(Uint32 *)(dst) = (((((src[HI]) & 0x00F80000) >> 8) | \
1046 (((src[HI]) & 0x0000FC00) >> 5) | \
1047 (((src[HI]) & 0x000000F8) >> 3)) \
1048 << 16) | \
1049 (((src[LO]) & 0x00F80000) >> 8) | \
1050 (((src[LO]) & 0x0000FC00) >> 5) | \
1051 (((src[LO]) & 0x000000F8) >> 3); \
1052 }
1053#endif
1054static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info)
1055{
1056#ifndef USE_DUFFS_LOOP
1057 int c;
1058#endif
1059 int width, height;
1060 Uint32 *src;
1061 Uint16 *dst;
1062 int srcskip, dstskip;
1063
1064 // Set up some basic variables
1065 width = info->dst_w;
1066 height = info->dst_h;
1067 src = (Uint32 *)info->src;
1068 srcskip = info->src_skip / 4;
1069 dst = (Uint16 *)info->dst;
1070 dstskip = info->dst_skip / 2;
1071
1072#ifdef USE_DUFFS_LOOP
1073 while (height--) {
1074 /* *INDENT-OFF* */ // clang-format off
1075 DUFFS_LOOP(
1076 RGB888_RGB565(dst, src);
1077 ++src;
1078 ++dst;
1079 , width);
1080 /* *INDENT-ON* */ // clang-format on
1081 src += srcskip;
1082 dst += dstskip;
1083 }
1084#else
1085 // Memory align at 4-byte boundary, if necessary
1086 if ((long)dst & 0x03) {
1087 // Don't do anything if width is 0
1088 if (width == 0) {
1089 return;
1090 }
1091 --width;
1092
1093 while (height--) {
1094 // Perform copy alignment
1095 RGB888_RGB565(dst, src);
1096 ++src;
1097 ++dst;
1098
1099 // Copy in 4 pixel chunks
1100 for (c = width / 4; c; --c) {
1101 RGB888_RGB565_TWO(dst, src);
1102 src += 2;
1103 dst += 2;
1104 RGB888_RGB565_TWO(dst, src);
1105 src += 2;
1106 dst += 2;
1107 }
1108 // Get any leftovers
1109 switch (width & 3) {
1110 case 3:
1111 RGB888_RGB565(dst, src);
1112 ++src;
1113 ++dst;
1114 SDL_FALLTHROUGH;
1115 case 2:
1116 RGB888_RGB565_TWO(dst, src);
1117 src += 2;
1118 dst += 2;
1119 break;
1120 case 1:
1121 RGB888_RGB565(dst, src);
1122 ++src;
1123 ++dst;
1124 break;
1125 }
1126 src += srcskip;
1127 dst += dstskip;
1128 }
1129 } else {
1130 while (height--) {
1131 // Copy in 4 pixel chunks
1132 for (c = width / 4; c; --c) {
1133 RGB888_RGB565_TWO(dst, src);
1134 src += 2;
1135 dst += 2;
1136 RGB888_RGB565_TWO(dst, src);
1137 src += 2;
1138 dst += 2;
1139 }
1140 // Get any leftovers
1141 switch (width & 3) {
1142 case 3:
1143 RGB888_RGB565(dst, src);
1144 ++src;
1145 ++dst;
1146 SDL_FALLTHROUGH;
1147 case 2:
1148 RGB888_RGB565_TWO(dst, src);
1149 src += 2;
1150 dst += 2;
1151 break;
1152 case 1:
1153 RGB888_RGB565(dst, src);
1154 ++src;
1155 ++dst;
1156 break;
1157 }
1158 src += srcskip;
1159 dst += dstskip;
1160 }
1161 }
1162#endif // USE_DUFFS_LOOP
1163}
1164
1165#ifdef SDL_HAVE_BLIT_N_RGB565
1166
1167// Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces
1168#define RGB565_32(dst, src, map) (map[src[LO] * 2] + map[src[HI] * 2 + 1])
1169static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
1170{
1171#ifndef USE_DUFFS_LOOP
1172 int c;
1173#endif
1174 int width, height;
1175 Uint8 *src;
1176 Uint32 *dst;
1177 int srcskip, dstskip;
1178
1179 // Set up some basic variables
1180 width = info->dst_w;
1181 height = info->dst_h;
1182 src = info->src;
1183 srcskip = info->src_skip;
1184 dst = (Uint32 *)info->dst;
1185 dstskip = info->dst_skip / 4;
1186
1187#ifdef USE_DUFFS_LOOP
1188 while (height--) {
1189 /* *INDENT-OFF* */ // clang-format off
1190 DUFFS_LOOP(
1191 {
1192 *dst++ = RGB565_32(dst, src, map);
1193 src += 2;
1194 },
1195 width);
1196 /* *INDENT-ON* */ // clang-format on
1197 src += srcskip;
1198 dst += dstskip;
1199 }
1200#else
1201 while (height--) {
1202 // Copy in 4 pixel chunks
1203 for (c = width / 4; c; --c) {
1204 *dst++ = RGB565_32(dst, src, map);
1205 src += 2;
1206 *dst++ = RGB565_32(dst, src, map);
1207 src += 2;
1208 *dst++ = RGB565_32(dst, src, map);
1209 src += 2;
1210 *dst++ = RGB565_32(dst, src, map);
1211 src += 2;
1212 }
1213 // Get any leftovers
1214 switch (width & 3) {
1215 case 3:
1216 *dst++ = RGB565_32(dst, src, map);
1217 src += 2;
1218 SDL_FALLTHROUGH;
1219 case 2:
1220 *dst++ = RGB565_32(dst, src, map);
1221 src += 2;
1222 SDL_FALLTHROUGH;
1223 case 1:
1224 *dst++ = RGB565_32(dst, src, map);
1225 src += 2;
1226 break;
1227 }
1228 src += srcskip;
1229 dst += dstskip;
1230 }
1231#endif // USE_DUFFS_LOOP
1232}
1233
1234/* *INDENT-OFF* */ // clang-format off
1235
1236// Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8
1237static const Uint32 RGB565_ARGB8888_LUT[512] = {
1238 0x00000000, 0xff000000, 0x00000008, 0xff002000,
1239 0x00000010, 0xff004000, 0x00000018, 0xff006100,
1240 0x00000020, 0xff008100, 0x00000029, 0xff00a100,
1241 0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
1242 0x00000041, 0xff080000, 0x0000004a, 0xff082000,
1243 0x00000052, 0xff084000, 0x0000005a, 0xff086100,
1244 0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
1245 0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
1246 0x00000083, 0xff100000, 0x0000008b, 0xff102000,
1247 0x00000094, 0xff104000, 0x0000009c, 0xff106100,
1248 0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
1249 0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
1250 0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
1251 0x000000d5, 0xff184000, 0x000000de, 0xff186100,
1252 0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
1253 0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
1254 0x00000400, 0xff200000, 0x00000408, 0xff202000,
1255 0x00000410, 0xff204000, 0x00000418, 0xff206100,
1256 0x00000420, 0xff208100, 0x00000429, 0xff20a100,
1257 0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
1258 0x00000441, 0xff290000, 0x0000044a, 0xff292000,
1259 0x00000452, 0xff294000, 0x0000045a, 0xff296100,
1260 0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
1261 0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
1262 0x00000483, 0xff310000, 0x0000048b, 0xff312000,
1263 0x00000494, 0xff314000, 0x0000049c, 0xff316100,
1264 0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
1265 0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
1266 0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
1267 0x000004d5, 0xff394000, 0x000004de, 0xff396100,
1268 0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
1269 0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
1270 0x00000800, 0xff410000, 0x00000808, 0xff412000,
1271 0x00000810, 0xff414000, 0x00000818, 0xff416100,
1272 0x00000820, 0xff418100, 0x00000829, 0xff41a100,
1273 0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
1274 0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
1275 0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
1276 0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
1277 0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
1278 0x00000883, 0xff520000, 0x0000088b, 0xff522000,
1279 0x00000894, 0xff524000, 0x0000089c, 0xff526100,
1280 0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
1281 0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
1282 0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
1283 0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
1284 0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
1285 0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
1286 0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
1287 0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
1288 0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
1289 0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
1290 0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
1291 0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
1292 0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
1293 0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
1294 0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
1295 0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
1296 0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
1297 0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
1298 0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
1299 0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
1300 0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
1301 0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
1302 0x00001000, 0xff830000, 0x00001008, 0xff832000,
1303 0x00001010, 0xff834000, 0x00001018, 0xff836100,
1304 0x00001020, 0xff838100, 0x00001029, 0xff83a100,
1305 0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
1306 0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
1307 0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
1308 0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
1309 0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
1310 0x00001083, 0xff940000, 0x0000108b, 0xff942000,
1311 0x00001094, 0xff944000, 0x0000109c, 0xff946100,
1312 0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
1313 0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
1314 0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
1315 0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
1316 0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
1317 0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
1318 0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
1319 0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
1320 0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
1321 0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
1322 0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
1323 0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
1324 0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
1325 0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
1326 0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
1327 0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
1328 0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
1329 0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
1330 0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
1331 0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
1332 0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
1333 0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
1334 0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
1335 0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
1336 0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
1337 0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
1338 0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
1339 0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
1340 0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
1341 0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
1342 0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
1343 0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
1344 0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
1345 0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
1346 0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
1347 0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
1348 0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
1349 0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
1350 0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
1351 0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
1352 0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
1353 0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
1354 0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
1355 0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
1356 0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
1357 0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
1358 0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
1359 0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
1360 0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
1361 0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
1362 0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
1363 0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
1364 0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
1365 0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
1366};
1367
1368static void Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
1369{
1370 Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
1371}
1372
1373// Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8
1374static const Uint32 RGB565_ABGR8888_LUT[512] = {
1375 0xff000000, 0x00000000, 0xff080000, 0x00002000,
1376 0xff100000, 0x00004000, 0xff180000, 0x00006100,
1377 0xff200000, 0x00008100, 0xff290000, 0x0000a100,
1378 0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
1379 0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
1380 0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
1381 0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
1382 0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
1383 0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
1384 0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
1385 0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
1386 0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
1387 0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
1388 0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
1389 0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
1390 0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
1391 0xff000400, 0x00000020, 0xff080400, 0x00002020,
1392 0xff100400, 0x00004020, 0xff180400, 0x00006120,
1393 0xff200400, 0x00008120, 0xff290400, 0x0000a120,
1394 0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
1395 0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
1396 0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
1397 0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
1398 0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
1399 0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
1400 0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
1401 0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
1402 0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
1403 0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
1404 0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
1405 0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
1406 0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
1407 0xff000800, 0x00000041, 0xff080800, 0x00002041,
1408 0xff100800, 0x00004041, 0xff180800, 0x00006141,
1409 0xff200800, 0x00008141, 0xff290800, 0x0000a141,
1410 0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
1411 0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
1412 0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
1413 0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
1414 0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
1415 0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
1416 0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
1417 0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
1418 0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
1419 0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
1420 0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
1421 0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
1422 0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
1423 0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
1424 0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
1425 0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
1426 0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
1427 0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
1428 0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
1429 0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
1430 0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
1431 0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
1432 0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
1433 0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
1434 0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
1435 0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
1436 0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
1437 0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
1438 0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
1439 0xff001000, 0x00000083, 0xff081000, 0x00002083,
1440 0xff101000, 0x00004083, 0xff181000, 0x00006183,
1441 0xff201000, 0x00008183, 0xff291000, 0x0000a183,
1442 0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
1443 0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
1444 0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
1445 0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
1446 0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
1447 0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
1448 0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
1449 0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
1450 0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
1451 0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
1452 0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
1453 0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
1454 0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
1455 0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
1456 0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
1457 0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
1458 0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
1459 0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
1460 0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
1461 0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
1462 0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
1463 0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
1464 0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
1465 0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
1466 0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
1467 0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
1468 0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
1469 0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
1470 0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
1471 0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
1472 0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
1473 0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
1474 0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
1475 0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
1476 0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
1477 0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
1478 0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
1479 0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
1480 0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
1481 0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
1482 0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
1483 0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
1484 0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
1485 0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
1486 0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
1487 0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
1488 0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
1489 0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
1490 0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
1491 0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
1492 0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
1493 0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
1494 0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
1495 0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
1496 0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
1497 0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
1498 0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
1499 0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
1500 0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
1501 0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
1502 0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
1503};
1504
1505static void Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
1506{
1507 Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
1508}
1509
1510// Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8
1511static const Uint32 RGB565_RGBA8888_LUT[512] = {
1512 0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
1513 0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
1514 0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
1515 0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
1516 0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
1517 0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
1518 0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
1519 0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
1520 0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
1521 0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
1522 0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
1523 0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
1524 0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
1525 0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
1526 0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
1527 0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
1528 0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
1529 0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
1530 0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
1531 0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
1532 0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
1533 0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
1534 0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
1535 0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
1536 0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
1537 0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
1538 0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
1539 0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
1540 0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
1541 0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
1542 0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
1543 0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
1544 0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
1545 0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
1546 0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
1547 0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
1548 0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
1549 0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
1550 0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
1551 0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
1552 0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
1553 0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
1554 0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
1555 0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
1556 0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
1557 0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
1558 0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
1559 0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
1560 0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
1561 0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
1562 0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
1563 0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
1564 0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
1565 0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
1566 0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
1567 0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
1568 0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
1569 0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
1570 0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
1571 0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
1572 0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
1573 0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
1574 0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
1575 0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
1576 0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
1577 0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
1578 0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
1579 0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
1580 0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
1581 0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
1582 0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
1583 0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
1584 0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
1585 0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
1586 0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
1587 0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
1588 0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
1589 0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
1590 0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
1591 0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
1592 0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
1593 0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
1594 0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
1595 0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
1596 0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
1597 0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
1598 0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
1599 0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
1600 0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
1601 0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
1602 0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
1603 0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
1604 0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
1605 0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
1606 0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
1607 0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
1608 0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
1609 0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
1610 0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
1611 0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
1612 0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
1613 0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
1614 0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
1615 0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
1616 0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
1617 0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
1618 0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
1619 0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
1620 0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
1621 0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
1622 0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
1623 0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
1624 0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
1625 0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
1626 0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
1627 0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
1628 0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
1629 0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
1630 0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
1631 0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
1632 0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
1633 0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
1634 0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
1635 0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
1636 0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
1637 0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
1638 0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
1639 0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
1640};
1641
1642static void Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
1643{
1644 Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
1645}
1646
1647// Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8
1648static const Uint32 RGB565_BGRA8888_LUT[512] = {
1649 0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
1650 0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
1651 0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
1652 0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
1653 0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
1654 0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
1655 0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
1656 0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
1657 0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
1658 0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
1659 0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
1660 0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
1661 0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
1662 0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
1663 0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
1664 0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
1665 0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
1666 0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
1667 0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
1668 0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
1669 0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
1670 0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
1671 0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
1672 0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
1673 0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
1674 0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
1675 0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
1676 0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
1677 0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
1678 0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
1679 0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
1680 0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
1681 0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
1682 0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
1683 0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
1684 0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
1685 0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
1686 0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
1687 0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
1688 0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
1689 0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
1690 0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
1691 0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
1692 0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
1693 0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
1694 0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
1695 0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
1696 0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
1697 0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
1698 0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
1699 0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
1700 0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
1701 0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
1702 0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
1703 0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
1704 0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
1705 0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
1706 0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
1707 0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
1708 0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
1709 0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
1710 0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
1711 0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
1712 0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
1713 0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
1714 0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
1715 0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
1716 0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
1717 0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
1718 0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
1719 0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
1720 0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
1721 0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
1722 0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
1723 0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
1724 0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
1725 0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
1726 0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
1727 0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
1728 0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
1729 0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
1730 0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
1731 0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
1732 0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
1733 0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
1734 0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
1735 0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
1736 0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
1737 0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
1738 0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
1739 0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
1740 0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
1741 0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
1742 0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
1743 0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
1744 0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
1745 0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
1746 0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
1747 0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
1748 0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
1749 0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
1750 0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
1751 0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
1752 0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
1753 0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
1754 0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
1755 0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
1756 0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
1757 0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
1758 0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
1759 0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
1760 0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
1761 0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
1762 0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
1763 0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
1764 0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
1765 0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
1766 0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
1767 0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
1768 0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
1769 0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
1770 0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
1771 0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
1772 0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
1773 0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
1774 0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
1775 0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
1776 0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
1777};
1778
1779static void Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
1780{
1781 Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
1782}
1783
1784/* *INDENT-ON* */ // clang-format on
1785
1786#endif // SDL_HAVE_BLIT_N_RGB565
1787
1788// blits 16 bit RGB<->RGBA with both surfaces having the same R,G,B fields
1789static void Blit2to2MaskAlpha(SDL_BlitInfo *info)
1790{
1791 int width = info->dst_w;
1792 int height = info->dst_h;
1793 Uint16 *src = (Uint16 *)info->src;
1794 int srcskip = info->src_skip;
1795 Uint16 *dst = (Uint16 *)info->dst;
1796 int dstskip = info->dst_skip;
1797 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
1798 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
1799
1800 if (dstfmt->Amask) {
1801 // RGB->RGBA, SET_ALPHA
1802 Uint16 mask = ((Uint32)info->a >> (8 - dstfmt->Abits)) << dstfmt->Ashift;
1803
1804 while (height--) {
1805 /* *INDENT-OFF* */ // clang-format off
1806 DUFFS_LOOP_TRIVIAL(
1807 {
1808 *dst = *src | mask;
1809 ++dst;
1810 ++src;
1811 },
1812 width);
1813 /* *INDENT-ON* */ // clang-format on
1814 src = (Uint16 *)((Uint8 *)src + srcskip);
1815 dst = (Uint16 *)((Uint8 *)dst + dstskip);
1816 }
1817 } else {
1818 // RGBA->RGB, NO_ALPHA
1819 Uint16 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
1820
1821 while (height--) {
1822 /* *INDENT-OFF* */ // clang-format off
1823 DUFFS_LOOP_TRIVIAL(
1824 {
1825 *dst = *src & mask;
1826 ++dst;
1827 ++src;
1828 },
1829 width);
1830 /* *INDENT-ON* */ // clang-format on
1831 src = (Uint16 *)((Uint8 *)src + srcskip);
1832 dst = (Uint16 *)((Uint8 *)dst + dstskip);
1833 }
1834 }
1835}
1836
1837// blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields
1838static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
1839{
1840 int width = info->dst_w;
1841 int height = info->dst_h;
1842 Uint32 *src = (Uint32 *)info->src;
1843 int srcskip = info->src_skip;
1844 Uint32 *dst = (Uint32 *)info->dst;
1845 int dstskip = info->dst_skip;
1846 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
1847 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
1848
1849 if (dstfmt->Amask) {
1850 // RGB->RGBA, SET_ALPHA
1851 Uint32 mask = ((Uint32)info->a >> (8 - dstfmt->Abits)) << dstfmt->Ashift;
1852
1853 while (height--) {
1854 /* *INDENT-OFF* */ // clang-format off
1855 DUFFS_LOOP_TRIVIAL(
1856 {
1857 *dst = *src | mask;
1858 ++dst;
1859 ++src;
1860 },
1861 width);
1862 /* *INDENT-ON* */ // clang-format on
1863 src = (Uint32 *)((Uint8 *)src + srcskip);
1864 dst = (Uint32 *)((Uint8 *)dst + dstskip);
1865 }
1866 } else {
1867 // RGBA->RGB, NO_ALPHA
1868 Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
1869
1870 while (height--) {
1871 /* *INDENT-OFF* */ // clang-format off
1872 DUFFS_LOOP_TRIVIAL(
1873 {
1874 *dst = *src & mask;
1875 ++dst;
1876 ++src;
1877 },
1878 width);
1879 /* *INDENT-ON* */ // clang-format on
1880 src = (Uint32 *)((Uint8 *)src + srcskip);
1881 dst = (Uint32 *)((Uint8 *)dst + dstskip);
1882 }
1883 }
1884}
1885
1886// permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel
1887static void get_permutation(const SDL_PixelFormatDetails *srcfmt, const SDL_PixelFormatDetails *dstfmt,
1888 int *_p0, int *_p1, int *_p2, int *_p3, int *_alpha_channel)
1889{
1890 int alpha_channel = 0, p0, p1, p2, p3;
1891#if SDL_BYTEORDER == SDL_LIL_ENDIAN
1892 int Pixel = 0x04030201; // identity permutation
1893#else
1894 int Pixel = 0x01020304; // identity permutation
1895 int srcbpp = srcfmt->bytes_per_pixel;
1896 int dstbpp = dstfmt->bytes_per_pixel;
1897#endif
1898
1899 if (srcfmt->Amask) {
1900 RGBA_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2, p3);
1901 } else {
1902 RGB_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2);
1903 p3 = 0;
1904 }
1905
1906 if (dstfmt->Amask) {
1907 if (srcfmt->Amask) {
1908 PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, p3);
1909 } else {
1910 PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, 0);
1911 }
1912 } else {
1913 PIXEL_FROM_RGB(Pixel, dstfmt, p0, p1, p2);
1914 }
1915
1916#if SDL_BYTEORDER == SDL_LIL_ENDIAN
1917 p0 = Pixel & 0xFF;
1918 p1 = (Pixel >> 8) & 0xFF;
1919 p2 = (Pixel >> 16) & 0xFF;
1920 p3 = (Pixel >> 24) & 0xFF;
1921#else
1922 p3 = Pixel & 0xFF;
1923 p2 = (Pixel >> 8) & 0xFF;
1924 p1 = (Pixel >> 16) & 0xFF;
1925 p0 = (Pixel >> 24) & 0xFF;
1926#endif
1927
1928 if (p0 == 0) {
1929 p0 = 1;
1930 alpha_channel = 0;
1931 } else if (p1 == 0) {
1932 p1 = 1;
1933 alpha_channel = 1;
1934 } else if (p2 == 0) {
1935 p2 = 1;
1936 alpha_channel = 2;
1937 } else if (p3 == 0) {
1938 p3 = 1;
1939 alpha_channel = 3;
1940 }
1941
1942#if SDL_BYTEORDER == SDL_LIL_ENDIAN
1943#else
1944 if (srcbpp == 3 && dstbpp == 4) {
1945 if (p0 != 1) {
1946 p0--;
1947 }
1948 if (p1 != 1) {
1949 p1--;
1950 }
1951 if (p2 != 1) {
1952 p2--;
1953 }
1954 if (p3 != 1) {
1955 p3--;
1956 }
1957 } else if (srcbpp == 4 && dstbpp == 3) {
1958 p0 = p1;
1959 p1 = p2;
1960 p2 = p3;
1961 }
1962#endif
1963 *_p0 = p0 - 1;
1964 *_p1 = p1 - 1;
1965 *_p2 = p2 - 1;
1966 *_p3 = p3 - 1;
1967
1968 if (_alpha_channel) {
1969 *_alpha_channel = alpha_channel;
1970 }
1971}
1972
1973static void BlitNtoN(SDL_BlitInfo *info)
1974{
1975 int width = info->dst_w;
1976 int height = info->dst_h;
1977 Uint8 *src = info->src;
1978 int srcskip = info->src_skip;
1979 Uint8 *dst = info->dst;
1980 int dstskip = info->dst_skip;
1981 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
1982 int srcbpp = srcfmt->bytes_per_pixel;
1983 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
1984 int dstbpp = dstfmt->bytes_per_pixel;
1985 unsigned alpha = dstfmt->Amask ? info->a : 0;
1986
1987#if HAVE_FAST_WRITE_INT8
1988 // Blit with permutation: 4->4
1989 if (srcbpp == 4 && dstbpp == 4 &&
1990 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
1991 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
1992
1993 // Find the appropriate permutation
1994 int alpha_channel, p0, p1, p2, p3;
1995 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
1996
1997 while (height--) {
1998 /* *INDENT-OFF* */ // clang-format off
1999 DUFFS_LOOP(
2000 {
2001 dst[0] = src[p0];
2002 dst[1] = src[p1];
2003 dst[2] = src[p2];
2004 dst[3] = src[p3];
2005 dst[alpha_channel] = (Uint8)alpha;
2006 src += 4;
2007 dst += 4;
2008 }, width);
2009 /* *INDENT-ON* */ // clang-format on
2010 src += srcskip;
2011 dst += dstskip;
2012 }
2013 return;
2014 }
2015#endif
2016
2017 // Blit with permutation: 4->3
2018 if (srcbpp == 4 && dstbpp == 3 &&
2019 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format)) {
2020
2021 // Find the appropriate permutation
2022 int p0, p1, p2, p3;
2023 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
2024
2025 while (height--) {
2026 /* *INDENT-OFF* */ // clang-format off
2027 DUFFS_LOOP(
2028 {
2029 dst[0] = src[p0];
2030 dst[1] = src[p1];
2031 dst[2] = src[p2];
2032 src += 4;
2033 dst += 3;
2034 }, width);
2035 /* *INDENT-ON* */ // clang-format on
2036 src += srcskip;
2037 dst += dstskip;
2038 }
2039 return;
2040 }
2041
2042#if HAVE_FAST_WRITE_INT8
2043 // Blit with permutation: 3->4
2044 if (srcbpp == 3 && dstbpp == 4 &&
2045 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
2046
2047 // Find the appropriate permutation
2048 int alpha_channel, p0, p1, p2, p3;
2049 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
2050
2051 while (height--) {
2052 /* *INDENT-OFF* */ // clang-format off
2053 DUFFS_LOOP(
2054 {
2055 dst[0] = src[p0];
2056 dst[1] = src[p1];
2057 dst[2] = src[p2];
2058 dst[3] = src[p3];
2059 dst[alpha_channel] = (Uint8)alpha;
2060 src += 3;
2061 dst += 4;
2062 }, width);
2063 /* *INDENT-ON* */ // clang-format on
2064 src += srcskip;
2065 dst += dstskip;
2066 }
2067 return;
2068 }
2069#endif
2070
2071 while (height--) {
2072 /* *INDENT-OFF* */ // clang-format off
2073 DUFFS_LOOP(
2074 {
2075 Uint32 Pixel;
2076 unsigned sR;
2077 unsigned sG;
2078 unsigned sB;
2079 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2080 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
2081 dst += dstbpp;
2082 src += srcbpp;
2083 },
2084 width);
2085 /* *INDENT-ON* */ // clang-format on
2086 src += srcskip;
2087 dst += dstskip;
2088 }
2089}
2090
2091static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
2092{
2093 int width = info->dst_w;
2094 int height = info->dst_h;
2095 Uint8 *src = info->src;
2096 int srcskip = info->src_skip;
2097 Uint8 *dst = info->dst;
2098 int dstskip = info->dst_skip;
2099 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2100 int srcbpp = srcfmt->bytes_per_pixel;
2101 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2102 int dstbpp = dstfmt->bytes_per_pixel;
2103 int c;
2104
2105#if HAVE_FAST_WRITE_INT8
2106 // Blit with permutation: 4->4
2107 if (srcbpp == 4 && dstbpp == 4 &&
2108 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
2109 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
2110
2111 // Find the appropriate permutation
2112 int p0, p1, p2, p3;
2113 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
2114
2115 while (height--) {
2116 /* *INDENT-OFF* */ // clang-format off
2117 DUFFS_LOOP(
2118 {
2119 dst[0] = src[p0];
2120 dst[1] = src[p1];
2121 dst[2] = src[p2];
2122 dst[3] = src[p3];
2123 src += 4;
2124 dst += 4;
2125 }, width);
2126 /* *INDENT-ON* */ // clang-format on
2127 src += srcskip;
2128 dst += dstskip;
2129 }
2130 return;
2131 }
2132#endif
2133
2134 while (height--) {
2135 for (c = width; c; --c) {
2136 Uint32 Pixel;
2137 unsigned sR, sG, sB, sA;
2138 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2139 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
2140 dst += dstbpp;
2141 src += srcbpp;
2142 }
2143 src += srcskip;
2144 dst += dstskip;
2145 }
2146}
2147
2148static void Blit2to2Key(SDL_BlitInfo *info)
2149{
2150 int width = info->dst_w;
2151 int height = info->dst_h;
2152 Uint16 *srcp = (Uint16 *)info->src;
2153 int srcskip = info->src_skip;
2154 Uint16 *dstp = (Uint16 *)info->dst;
2155 int dstskip = info->dst_skip;
2156 Uint32 ckey = info->colorkey;
2157 Uint32 rgbmask = ~info->src_fmt->Amask;
2158
2159 // Set up some basic variables
2160 srcskip /= 2;
2161 dstskip /= 2;
2162 ckey &= rgbmask;
2163
2164 while (height--) {
2165 /* *INDENT-OFF* */ // clang-format off
2166 DUFFS_LOOP_TRIVIAL(
2167 {
2168 if ( (*srcp & rgbmask) != ckey ) {
2169 *dstp = *srcp;
2170 }
2171 dstp++;
2172 srcp++;
2173 },
2174 width);
2175 /* *INDENT-ON* */ // clang-format on
2176 srcp += srcskip;
2177 dstp += dstskip;
2178 }
2179}
2180
2181static void BlitNtoNKey(SDL_BlitInfo *info)
2182{
2183 int width = info->dst_w;
2184 int height = info->dst_h;
2185 Uint8 *src = info->src;
2186 int srcskip = info->src_skip;
2187 Uint8 *dst = info->dst;
2188 int dstskip = info->dst_skip;
2189 Uint32 ckey = info->colorkey;
2190 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2191 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2192 int srcbpp = srcfmt->bytes_per_pixel;
2193 int dstbpp = dstfmt->bytes_per_pixel;
2194 unsigned alpha = dstfmt->Amask ? info->a : 0;
2195 Uint32 rgbmask = ~srcfmt->Amask;
2196 int sfmt = srcfmt->format;
2197 int dfmt = dstfmt->format;
2198
2199 // Set up some basic variables
2200 ckey &= rgbmask;
2201
2202 // BPP 4, same rgb
2203 if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
2204 Uint32 *src32 = (Uint32 *)src;
2205 Uint32 *dst32 = (Uint32 *)dst;
2206
2207 if (dstfmt->Amask) {
2208 // RGB->RGBA, SET_ALPHA
2209 Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
2210 while (height--) {
2211 /* *INDENT-OFF* */ // clang-format off
2212 DUFFS_LOOP_TRIVIAL(
2213 {
2214 if ((*src32 & rgbmask) != ckey) {
2215 *dst32 = *src32 | mask;
2216 }
2217 ++dst32;
2218 ++src32;
2219 }, width);
2220 /* *INDENT-ON* */ // clang-format on
2221 src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
2222 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
2223 }
2224 return;
2225 } else {
2226 // RGBA->RGB, NO_ALPHA
2227 Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
2228 while (height--) {
2229 /* *INDENT-OFF* */ // clang-format off
2230 DUFFS_LOOP_TRIVIAL(
2231 {
2232 if ((*src32 & rgbmask) != ckey) {
2233 *dst32 = *src32 & mask;
2234 }
2235 ++dst32;
2236 ++src32;
2237 }, width);
2238 /* *INDENT-ON* */ // clang-format on
2239 src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
2240 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
2241 }
2242 return;
2243 }
2244 }
2245
2246#if HAVE_FAST_WRITE_INT8
2247 // Blit with permutation: 4->4
2248 if (srcbpp == 4 && dstbpp == 4 &&
2249 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
2250 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
2251
2252 // Find the appropriate permutation
2253 int alpha_channel, p0, p1, p2, p3;
2254 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
2255
2256 while (height--) {
2257 /* *INDENT-OFF* */ // clang-format off
2258 DUFFS_LOOP(
2259 {
2260 Uint32 *src32 = (Uint32*)src;
2261
2262 if ((*src32 & rgbmask) != ckey) {
2263 dst[0] = src[p0];
2264 dst[1] = src[p1];
2265 dst[2] = src[p2];
2266 dst[3] = src[p3];
2267 dst[alpha_channel] = (Uint8)alpha;
2268 }
2269 src += 4;
2270 dst += 4;
2271 }, width);
2272 /* *INDENT-ON* */ // clang-format on
2273 src += srcskip;
2274 dst += dstskip;
2275 }
2276 return;
2277 }
2278#endif
2279
2280 // BPP 3, same rgb triplet
2281 if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
2282 (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {
2283
2284#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2285 Uint8 k0 = ckey & 0xFF;
2286 Uint8 k1 = (ckey >> 8) & 0xFF;
2287 Uint8 k2 = (ckey >> 16) & 0xFF;
2288#else
2289 Uint8 k0 = (ckey >> 16) & 0xFF;
2290 Uint8 k1 = (ckey >> 8) & 0xFF;
2291 Uint8 k2 = ckey & 0xFF;
2292#endif
2293
2294 while (height--) {
2295 /* *INDENT-OFF* */ // clang-format off
2296 DUFFS_LOOP(
2297 {
2298 Uint8 s0 = src[0];
2299 Uint8 s1 = src[1];
2300 Uint8 s2 = src[2];
2301
2302 if (k0 != s0 || k1 != s1 || k2 != s2) {
2303 dst[0] = s0;
2304 dst[1] = s1;
2305 dst[2] = s2;
2306 }
2307 src += 3;
2308 dst += 3;
2309 },
2310 width);
2311 /* *INDENT-ON* */ // clang-format on
2312 src += srcskip;
2313 dst += dstskip;
2314 }
2315 return;
2316 }
2317
2318 // BPP 3, inversed rgb triplet
2319 if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_BGR24) ||
2320 (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_RGB24)) {
2321
2322#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2323 Uint8 k0 = ckey & 0xFF;
2324 Uint8 k1 = (ckey >> 8) & 0xFF;
2325 Uint8 k2 = (ckey >> 16) & 0xFF;
2326#else
2327 Uint8 k0 = (ckey >> 16) & 0xFF;
2328 Uint8 k1 = (ckey >> 8) & 0xFF;
2329 Uint8 k2 = ckey & 0xFF;
2330#endif
2331
2332 while (height--) {
2333 /* *INDENT-OFF* */ // clang-format off
2334 DUFFS_LOOP(
2335 {
2336 Uint8 s0 = src[0];
2337 Uint8 s1 = src[1];
2338 Uint8 s2 = src[2];
2339 if (k0 != s0 || k1 != s1 || k2 != s2) {
2340 // Inversed RGB
2341 dst[0] = s2;
2342 dst[1] = s1;
2343 dst[2] = s0;
2344 }
2345 src += 3;
2346 dst += 3;
2347 },
2348 width);
2349 /* *INDENT-ON* */ // clang-format on
2350 src += srcskip;
2351 dst += dstskip;
2352 }
2353 return;
2354 }
2355
2356 // Blit with permutation: 4->3
2357 if (srcbpp == 4 && dstbpp == 3 &&
2358 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format)) {
2359
2360 // Find the appropriate permutation
2361 int p0, p1, p2, p3;
2362 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
2363
2364 while (height--) {
2365 /* *INDENT-OFF* */ // clang-format off
2366 DUFFS_LOOP(
2367 {
2368 Uint32 *src32 = (Uint32*)src;
2369 if ((*src32 & rgbmask) != ckey) {
2370 dst[0] = src[p0];
2371 dst[1] = src[p1];
2372 dst[2] = src[p2];
2373 }
2374 src += 4;
2375 dst += 3;
2376 }, width);
2377 /* *INDENT-ON* */ // clang-format on
2378 src += srcskip;
2379 dst += dstskip;
2380 }
2381 return;
2382 }
2383
2384#if HAVE_FAST_WRITE_INT8
2385 // Blit with permutation: 3->4
2386 if (srcbpp == 3 && dstbpp == 4 &&
2387 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
2388
2389#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2390 Uint8 k0 = ckey & 0xFF;
2391 Uint8 k1 = (ckey >> 8) & 0xFF;
2392 Uint8 k2 = (ckey >> 16) & 0xFF;
2393#else
2394 Uint8 k0 = (ckey >> 16) & 0xFF;
2395 Uint8 k1 = (ckey >> 8) & 0xFF;
2396 Uint8 k2 = ckey & 0xFF;
2397#endif
2398
2399 // Find the appropriate permutation
2400 int alpha_channel, p0, p1, p2, p3;
2401 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
2402
2403 while (height--) {
2404 /* *INDENT-OFF* */ // clang-format off
2405 DUFFS_LOOP(
2406 {
2407 Uint8 s0 = src[0];
2408 Uint8 s1 = src[1];
2409 Uint8 s2 = src[2];
2410
2411 if (k0 != s0 || k1 != s1 || k2 != s2) {
2412 dst[0] = src[p0];
2413 dst[1] = src[p1];
2414 dst[2] = src[p2];
2415 dst[3] = src[p3];
2416 dst[alpha_channel] = (Uint8)alpha;
2417 }
2418 src += 3;
2419 dst += 4;
2420 }, width);
2421 /* *INDENT-ON* */ // clang-format on
2422 src += srcskip;
2423 dst += dstskip;
2424 }
2425 return;
2426 }
2427#endif
2428
2429 while (height--) {
2430 /* *INDENT-OFF* */ // clang-format off
2431 DUFFS_LOOP(
2432 {
2433 Uint32 Pixel;
2434 unsigned sR;
2435 unsigned sG;
2436 unsigned sB;
2437 RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
2438 if ( (Pixel & rgbmask) != ckey ) {
2439 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
2440 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
2441 }
2442 dst += dstbpp;
2443 src += srcbpp;
2444 },
2445 width);
2446 /* *INDENT-ON* */ // clang-format on
2447 src += srcskip;
2448 dst += dstskip;
2449 }
2450}
2451
2452static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
2453{
2454 int width = info->dst_w;
2455 int height = info->dst_h;
2456 Uint8 *src = info->src;
2457 int srcskip = info->src_skip;
2458 Uint8 *dst = info->dst;
2459 int dstskip = info->dst_skip;
2460 Uint32 ckey = info->colorkey;
2461 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2462 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2463 Uint32 rgbmask = ~srcfmt->Amask;
2464
2465 Uint8 srcbpp;
2466 Uint8 dstbpp;
2467 Uint32 Pixel;
2468 unsigned sR, sG, sB, sA;
2469
2470 // Set up some basic variables
2471 srcbpp = srcfmt->bytes_per_pixel;
2472 dstbpp = dstfmt->bytes_per_pixel;
2473 ckey &= rgbmask;
2474
2475 // Fastpath: same source/destination format, with Amask, bpp 32, loop is vectorized. ~10x faster
2476 if (srcfmt->format == dstfmt->format) {
2477
2478 if (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 ||
2479 srcfmt->format == SDL_PIXELFORMAT_ABGR8888 ||
2480 srcfmt->format == SDL_PIXELFORMAT_BGRA8888 ||
2481 srcfmt->format == SDL_PIXELFORMAT_RGBA8888) {
2482
2483 Uint32 *src32 = (Uint32 *)src;
2484 Uint32 *dst32 = (Uint32 *)dst;
2485 while (height--) {
2486 /* *INDENT-OFF* */ // clang-format off
2487 DUFFS_LOOP_TRIVIAL(
2488 {
2489 if ((*src32 & rgbmask) != ckey) {
2490 *dst32 = *src32;
2491 }
2492 ++src32;
2493 ++dst32;
2494 },
2495 width);
2496 /* *INDENT-ON* */ // clang-format on
2497 src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
2498 dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
2499 }
2500 }
2501 return;
2502 }
2503
2504#if HAVE_FAST_WRITE_INT8
2505 // Blit with permutation: 4->4
2506 if (srcbpp == 4 && dstbpp == 4 &&
2507 !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
2508 !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
2509
2510 // Find the appropriate permutation
2511 int p0, p1, p2, p3;
2512 get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
2513
2514 while (height--) {
2515 /* *INDENT-OFF* */ // clang-format off
2516 DUFFS_LOOP(
2517 {
2518 Uint32 *src32 = (Uint32*)src;
2519 if ((*src32 & rgbmask) != ckey) {
2520 dst[0] = src[p0];
2521 dst[1] = src[p1];
2522 dst[2] = src[p2];
2523 dst[3] = src[p3];
2524 }
2525 src += 4;
2526 dst += 4;
2527 }, width);
2528 /* *INDENT-ON* */ // clang-format on
2529 src += srcskip;
2530 dst += dstskip;
2531 }
2532 return;
2533 }
2534#endif
2535
2536 while (height--) {
2537 /* *INDENT-OFF* */ // clang-format off
2538 DUFFS_LOOP(
2539 {
2540 DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2541 if ( (Pixel & rgbmask) != ckey ) {
2542 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
2543 }
2544 dst += dstbpp;
2545 src += srcbpp;
2546 },
2547 width);
2548 /* *INDENT-ON* */ // clang-format on
2549 src += srcskip;
2550 dst += dstskip;
2551 }
2552}
2553
2554// Convert between two 8888 pixels with differing formats.
2555#define SWIZZLE_8888_SRC_ALPHA(src, dst, srcfmt, dstfmt) \
2556 do { \
2557 dst = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \
2558 (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \
2559 (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \
2560 (((src >> srcfmt->Ashift) & 0xFF) << dstfmt->Ashift); \
2561 } while (0)
2562
2563#define SWIZZLE_8888_DST_ALPHA(src, dst, srcfmt, dstfmt, dstAmask) \
2564 do { \
2565 dst = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \
2566 (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \
2567 (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \
2568 dstAmask; \
2569 } while (0)
2570
2571#if defined(SDL_SSE4_1_INTRINSICS) || defined(SDL_AVX2_INTRINSICS) || (defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8))
2572static void Get8888AlphaMaskAndShift(const SDL_PixelFormatDetails *fmt, Uint32 *mask, Uint32 *shift)
2573{
2574 if (fmt->Amask) {
2575 *mask = fmt->Amask;
2576 *shift = fmt->Ashift;
2577 } else {
2578 *mask = ~(fmt->Rmask | fmt->Gmask | fmt->Bmask);
2579 switch (*mask) {
2580 case 0x000000FF:
2581 *shift = 0;
2582 break;
2583 case 0x0000FF00:
2584 *shift = 8;
2585 break;
2586 case 0x00FF0000:
2587 *shift = 16;
2588 break;
2589 case 0xFF000000:
2590 *shift = 24;
2591 break;
2592 default:
2593 // Should never happen
2594 *shift = 0;
2595 break;
2596 }
2597 }
2598}
2599#endif // SSE4.1, AVX2, and NEON implementations of Blit8888to8888PixelSwizzle
2600
2601#ifdef SDL_SSE4_1_INTRINSICS
2602
2603static void SDL_TARGETING("sse4.1") Blit8888to8888PixelSwizzleSSE41(SDL_BlitInfo *info)
2604{
2605 int width = info->dst_w;
2606 int height = info->dst_h;
2607 Uint8 *src = info->src;
2608 int srcskip = info->src_skip;
2609 Uint8 *dst = info->dst;
2610 int dstskip = info->dst_skip;
2611 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2612 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2613 bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask);
2614 Uint32 srcAmask, srcAshift;
2615 Uint32 dstAmask, dstAshift;
2616
2617 Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift);
2618 Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);
2619
2620 // The byte offsets for the start of each pixel
2621 const __m128i mask_offsets = _mm_set_epi8(
2622 12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0);
2623
2624 const __m128i convert_mask = _mm_add_epi32(
2625 _mm_set1_epi32(
2626 ((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
2627 ((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
2628 ((srcfmt->Bshift >> 3) << dstfmt->Bshift) |
2629 ((srcAshift >> 3) << dstAshift)),
2630 mask_offsets);
2631
2632 const __m128i alpha_fill_mask = _mm_set1_epi32((int)dstAmask);
2633
2634 while (height--) {
2635 int i = 0;
2636
2637 for (; i + 4 <= width; i += 4) {
2638 // Load 4 src pixels
2639 __m128i src128 = _mm_loadu_si128((__m128i *)src);
2640
2641 // Convert to dst format
2642 src128 = _mm_shuffle_epi8(src128, convert_mask);
2643
2644 if (fill_alpha) {
2645 // Set the alpha channels of src to 255
2646 src128 = _mm_or_si128(src128, alpha_fill_mask);
2647 }
2648
2649 // Save the result
2650 _mm_storeu_si128((__m128i *)dst, src128);
2651
2652 src += 16;
2653 dst += 16;
2654 }
2655
2656 for (; i < width; ++i) {
2657 Uint32 src32 = *(Uint32 *)src;
2658 Uint32 dst32;
2659 if (fill_alpha) {
2660 SWIZZLE_8888_DST_ALPHA(src32, dst32, srcfmt, dstfmt, dstAmask);
2661 } else {
2662 SWIZZLE_8888_SRC_ALPHA(src32, dst32, srcfmt, dstfmt);
2663 }
2664 *(Uint32 *)dst = dst32;
2665 src += 4;
2666 dst += 4;
2667 }
2668
2669 src += srcskip;
2670 dst += dstskip;
2671 }
2672}
2673
2674#endif
2675
2676#ifdef SDL_AVX2_INTRINSICS
2677
2678static void SDL_TARGETING("avx2") Blit8888to8888PixelSwizzleAVX2(SDL_BlitInfo *info)
2679{
2680 int width = info->dst_w;
2681 int height = info->dst_h;
2682 Uint8 *src = info->src;
2683 int srcskip = info->src_skip;
2684 Uint8 *dst = info->dst;
2685 int dstskip = info->dst_skip;
2686 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2687 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2688 bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask);
2689 Uint32 srcAmask, srcAshift;
2690 Uint32 dstAmask, dstAshift;
2691
2692 Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift);
2693 Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);
2694
2695 // The byte offsets for the start of each pixel
2696 const __m256i mask_offsets = _mm256_set_epi8(
2697 28, 28, 28, 28, 24, 24, 24, 24, 20, 20, 20, 20, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0);
2698
2699 const __m256i convert_mask = _mm256_add_epi32(
2700 _mm256_set1_epi32(
2701 ((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
2702 ((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
2703 ((srcfmt->Bshift >> 3) << dstfmt->Bshift) |
2704 ((srcAshift >> 3) << dstAshift)),
2705 mask_offsets);
2706
2707 const __m256i alpha_fill_mask = _mm256_set1_epi32((int)dstAmask);
2708
2709 while (height--) {
2710 int i = 0;
2711
2712 for (; i + 8 <= width; i += 8) {
2713 // Load 8 src pixels
2714 __m256i src256 = _mm256_loadu_si256((__m256i *)src);
2715
2716 // Convert to dst format
2717 src256 = _mm256_shuffle_epi8(src256, convert_mask);
2718
2719 if (fill_alpha) {
2720 // Set the alpha channels of src to 255
2721 src256 = _mm256_or_si256(src256, alpha_fill_mask);
2722 }
2723
2724 // Save the result
2725 _mm256_storeu_si256((__m256i *)dst, src256);
2726
2727 src += 32;
2728 dst += 32;
2729 }
2730
2731 for (; i < width; ++i) {
2732 Uint32 src32 = *(Uint32 *)src;
2733 Uint32 dst32;
2734 if (fill_alpha) {
2735 SWIZZLE_8888_DST_ALPHA(src32, dst32, srcfmt, dstfmt, dstAmask);
2736 } else {
2737 SWIZZLE_8888_SRC_ALPHA(src32, dst32, srcfmt, dstfmt);
2738 }
2739 *(Uint32 *)dst = dst32;
2740 src += 4;
2741 dst += 4;
2742 }
2743
2744 src += srcskip;
2745 dst += dstskip;
2746 }
2747}
2748
2749#endif
2750
2751#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8)
2752
2753static void Blit8888to8888PixelSwizzleNEON(SDL_BlitInfo *info)
2754{
2755 int width = info->dst_w;
2756 int height = info->dst_h;
2757 Uint8 *src = info->src;
2758 int srcskip = info->src_skip;
2759 Uint8 *dst = info->dst;
2760 int dstskip = info->dst_skip;
2761 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2762 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2763 bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask);
2764 Uint32 srcAmask, srcAshift;
2765 Uint32 dstAmask, dstAshift;
2766
2767 Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift);
2768 Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);
2769
2770 // The byte offsets for the start of each pixel
2771 const uint8x16_t mask_offsets = vreinterpretq_u8_u64(vcombine_u64(
2772 vcreate_u64(0x0404040400000000), vcreate_u64(0x0c0c0c0c08080808)));
2773
2774 const uint8x16_t convert_mask = vreinterpretq_u8_u32(vaddq_u32(
2775 vreinterpretq_u32_u8(mask_offsets),
2776 vdupq_n_u32(
2777 ((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
2778 ((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
2779 ((srcfmt->Bshift >> 3) << dstfmt->Bshift) |
2780 ((srcAshift >> 3) << dstAshift))));
2781
2782 const uint8x16_t alpha_fill_mask = vreinterpretq_u8_u32(vdupq_n_u32(dstAmask));
2783
2784 while (height--) {
2785 int i = 0;
2786
2787 for (; i + 4 <= width; i += 4) {
2788 // Load 4 src pixels
2789 uint8x16_t src128 = vld1q_u8(src);
2790
2791 // Convert to dst format
2792 src128 = vqtbl1q_u8(src128, convert_mask);
2793
2794 if (fill_alpha) {
2795 // Set the alpha channels of src to 255
2796 src128 = vorrq_u8(src128, alpha_fill_mask);
2797 }
2798
2799 // Save the result
2800 vst1q_u8(dst, src128);
2801
2802 src += 16;
2803 dst += 16;
2804 }
2805
2806 // Process 1 pixel per iteration, max 3 iterations, same calculations as above
2807 for (; i < width; ++i) {
2808 // Top 32-bits will be not used in src32
2809 uint8x8_t src32 = vreinterpret_u8_u32(vld1_dup_u32((Uint32*)src));
2810
2811 // Convert to dst format
2812 src32 = vtbl1_u8(src32, vget_low_u8(convert_mask));
2813
2814 if (fill_alpha) {
2815 // Set the alpha channels of src to 255
2816 src32 = vorr_u8(src32, vget_low_u8(alpha_fill_mask));
2817 }
2818
2819 // Save the result, only low 32-bits
2820 vst1_lane_u32((Uint32*)dst, vreinterpret_u32_u8(src32), 0);
2821
2822 src += 4;
2823 dst += 4;
2824 }
2825
2826 src += srcskip;
2827 dst += dstskip;
2828 }
2829}
2830
2831#endif
2832
2833// Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet
2834static void Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo *info)
2835{
2836 int width = info->dst_w;
2837 int height = info->dst_h;
2838 Uint8 *src = info->src;
2839 int srcskip = info->src_skip;
2840 Uint8 *dst = info->dst;
2841 int dstskip = info->dst_skip;
2842 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2843 int srcbpp = srcfmt->bytes_per_pixel;
2844 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2845 int dstbpp = dstfmt->bytes_per_pixel;
2846
2847 if (dstfmt->Amask) {
2848 // SET_ALPHA
2849 Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
2850#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2851 int i0 = 0, i1 = 1, i2 = 2;
2852#else
2853 int i0 = srcbpp - 1 - 0;
2854 int i1 = srcbpp - 1 - 1;
2855 int i2 = srcbpp - 1 - 2;
2856#endif
2857 while (height--) {
2858 /* *INDENT-OFF* */ // clang-format off
2859 DUFFS_LOOP(
2860 {
2861 Uint32 *dst32 = (Uint32*)dst;
2862 Uint8 s0 = src[i0];
2863 Uint8 s1 = src[i1];
2864 Uint8 s2 = src[i2];
2865 *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
2866 dst += 4;
2867 src += srcbpp;
2868 }, width);
2869 /* *INDENT-ON* */ // clang-format on
2870 src += srcskip;
2871 dst += dstskip;
2872 }
2873 } else {
2874 // NO_ALPHA
2875#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2876 int i0 = 0, i1 = 1, i2 = 2;
2877 int j0 = 0, j1 = 1, j2 = 2;
2878#else
2879 int i0 = srcbpp - 1 - 0;
2880 int i1 = srcbpp - 1 - 1;
2881 int i2 = srcbpp - 1 - 2;
2882 int j0 = dstbpp - 1 - 0;
2883 int j1 = dstbpp - 1 - 1;
2884 int j2 = dstbpp - 1 - 2;
2885#endif
2886 while (height--) {
2887 /* *INDENT-OFF* */ // clang-format off
2888 DUFFS_LOOP(
2889 {
2890 Uint8 s0 = src[i0];
2891 Uint8 s1 = src[i1];
2892 Uint8 s2 = src[i2];
2893 dst[j0] = s0;
2894 dst[j1] = s1;
2895 dst[j2] = s2;
2896 dst += dstbpp;
2897 src += srcbpp;
2898 }, width);
2899 /* *INDENT-ON* */ // clang-format on
2900 src += srcskip;
2901 dst += dstskip;
2902 }
2903 }
2904}
2905
2906// Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet
2907static void Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo *info)
2908{
2909 int width = info->dst_w;
2910 int height = info->dst_h;
2911 Uint8 *src = info->src;
2912 int srcskip = info->src_skip;
2913 Uint8 *dst = info->dst;
2914 int dstskip = info->dst_skip;
2915 const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
2916 int srcbpp = srcfmt->bytes_per_pixel;
2917 const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
2918 int dstbpp = dstfmt->bytes_per_pixel;
2919
2920 if (dstfmt->Amask) {
2921 if (srcfmt->Amask) {
2922 // COPY_ALPHA
2923 // Only to switch ABGR8888 <-> ARGB8888
2924 while (height--) {
2925#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2926 int i0 = 0, i1 = 1, i2 = 2, i3 = 3;
2927#else
2928 int i0 = 3, i1 = 2, i2 = 1, i3 = 0;
2929#endif
2930 /* *INDENT-OFF* */ // clang-format off
2931 DUFFS_LOOP(
2932 {
2933 Uint32 *dst32 = (Uint32*)dst;
2934 Uint8 s0 = src[i0];
2935 Uint8 s1 = src[i1];
2936 Uint8 s2 = src[i2];
2937 Uint32 alphashift = ((Uint32)src[i3]) << dstfmt->Ashift;
2938 // inversed, compared to Blit_3or4_to_3or4__same_rgb
2939 *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
2940 dst += 4;
2941 src += 4;
2942 }, width);
2943 /* *INDENT-ON* */ // clang-format on
2944 src += srcskip;
2945 dst += dstskip;
2946 }
2947 } else {
2948 // SET_ALPHA
2949 Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
2950#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2951 int i0 = 0, i1 = 1, i2 = 2;
2952#else
2953 int i0 = srcbpp - 1 - 0;
2954 int i1 = srcbpp - 1 - 1;
2955 int i2 = srcbpp - 1 - 2;
2956#endif
2957 while (height--) {
2958 /* *INDENT-OFF* */ // clang-format off
2959 DUFFS_LOOP(
2960 {
2961 Uint32 *dst32 = (Uint32*)dst;
2962 Uint8 s0 = src[i0];
2963 Uint8 s1 = src[i1];
2964 Uint8 s2 = src[i2];
2965 // inversed, compared to Blit_3or4_to_3or4__same_rgb
2966 *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
2967 dst += 4;
2968 src += srcbpp;
2969 }, width);
2970 /* *INDENT-ON* */ // clang-format on
2971 src += srcskip;
2972 dst += dstskip;
2973 }
2974 }
2975 } else {
2976 // NO_ALPHA
2977#if SDL_BYTEORDER == SDL_LIL_ENDIAN
2978 int i0 = 0, i1 = 1, i2 = 2;
2979 int j0 = 2, j1 = 1, j2 = 0;
2980#else
2981 int i0 = srcbpp - 1 - 0;
2982 int i1 = srcbpp - 1 - 1;
2983 int i2 = srcbpp - 1 - 2;
2984 int j0 = dstbpp - 1 - 2;
2985 int j1 = dstbpp - 1 - 1;
2986 int j2 = dstbpp - 1 - 0;
2987#endif
2988 while (height--) {
2989 /* *INDENT-OFF* */ // clang-format off
2990 DUFFS_LOOP(
2991 {
2992 Uint8 s0 = src[i0];
2993 Uint8 s1 = src[i1];
2994 Uint8 s2 = src[i2];
2995 // inversed, compared to Blit_3or4_to_3or4__same_rgb
2996 dst[j0] = s0;
2997 dst[j1] = s1;
2998 dst[j2] = s2;
2999 dst += dstbpp;
3000 src += srcbpp;
3001 }, width);
3002 /* *INDENT-ON* */ // clang-format on
3003 src += srcskip;
3004 dst += dstskip;
3005 }
3006 }
3007}
3008
3009// Normal N to N optimized blitters
3010#define NO_ALPHA 1
3011#define SET_ALPHA 2
3012#define COPY_ALPHA 4
3013struct blit_table
3014{
3015 Uint32 srcR, srcG, srcB;
3016 int dstbpp;
3017 Uint32 dstR, dstG, dstB;
3018 Uint32 blit_features;
3019 SDL_BlitFunc blitfunc;
3020 Uint32 alpha; // bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA
3021};
3022static const struct blit_table normal_blit_1[] = {
3023 // Default for 8-bit RGB source, never optimized
3024 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
3025};
3026
3027static const struct blit_table normal_blit_2[] = {
3028#ifdef SDL_ALTIVEC_BLITTERS
3029 // has-altivec
3030 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
3031 BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3032 { 0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
3033 BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3034#endif
3035#ifdef SDL_HAVE_BLIT_N_RGB565
3036 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
3037 0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3038 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
3039 0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3040 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
3041 0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3042 { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
3043 0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3044#endif
3045 // Default for 16-bit RGB source, used if no other blitter matches
3046 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
3047};
3048
3049static const struct blit_table normal_blit_3[] = {
3050 // 3->4 with same rgb triplet
3051 { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
3052 0, Blit_3or4_to_3or4__same_rgb,
3053#if HAVE_FAST_WRITE_INT8
3054 NO_ALPHA |
3055#endif
3056 SET_ALPHA },
3057 { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
3058 0, Blit_3or4_to_3or4__same_rgb,
3059#if HAVE_FAST_WRITE_INT8
3060 NO_ALPHA |
3061#endif
3062 SET_ALPHA },
3063 // 3->4 with inversed rgb triplet
3064 { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
3065 0, Blit_3or4_to_3or4__inversed_rgb,
3066#if HAVE_FAST_WRITE_INT8
3067 NO_ALPHA |
3068#endif
3069 SET_ALPHA },
3070 { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
3071 0, Blit_3or4_to_3or4__inversed_rgb,
3072#if HAVE_FAST_WRITE_INT8
3073 NO_ALPHA |
3074#endif
3075 SET_ALPHA },
3076 // 3->3 to switch RGB 24 <-> BGR 24
3077 { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
3078 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
3079 { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
3080 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
3081 // Default for 24-bit RGB source, never optimized
3082 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
3083};
3084
3085static const struct blit_table normal_blit_4[] = {
3086#ifdef SDL_ALTIVEC_BLITTERS
3087 // has-altivec | dont-use-prefetch
3088 { 0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
3089 BLIT_FEATURE_HAS_ALTIVEC | BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3090 // has-altivec
3091 { 0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
3092 BLIT_FEATURE_HAS_ALTIVEC, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
3093 // has-altivec
3094 { 0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
3095 BLIT_FEATURE_HAS_ALTIVEC, Blit_XRGB8888_RGB565Altivec, NO_ALPHA },
3096#endif
3097 // 4->3 with same rgb triplet
3098 { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
3099 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA },
3100 { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
3101 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA },
3102 // 4->3 with inversed rgb triplet
3103 { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
3104 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA },
3105 { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
3106 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA },
3107 // 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888
3108 { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
3109 0, Blit_3or4_to_3or4__inversed_rgb,
3110#if HAVE_FAST_WRITE_INT8
3111 NO_ALPHA |
3112#endif
3113 SET_ALPHA | COPY_ALPHA },
3114 { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
3115 0, Blit_3or4_to_3or4__inversed_rgb,
3116#if HAVE_FAST_WRITE_INT8
3117 NO_ALPHA |
3118#endif
3119 SET_ALPHA | COPY_ALPHA },
3120 // RGB 888 and RGB 565
3121 { 0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
3122 0, Blit_XRGB8888_RGB565, NO_ALPHA },
3123 { 0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
3124 0, Blit_XRGB8888_RGB555, NO_ALPHA },
3125 // Default for 32-bit RGB source, used if no other blitter matches
3126 { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
3127};
3128
3129static const struct blit_table *const normal_blit[] = {
3130 normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
3131};
3132
3133// Mask matches table, or table entry is zero
3134#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
3135
3136SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface)
3137{
3138 const SDL_PixelFormatDetails *srcfmt;
3139 const SDL_PixelFormatDetails *dstfmt;
3140 const struct blit_table *table;
3141 int which;
3142 SDL_BlitFunc blitfun;
3143
3144 // Set up data for choosing the blit
3145 srcfmt = surface->fmt;
3146 dstfmt = surface->map.info.dst_fmt;
3147
3148 // We don't support destinations less than 8-bits
3149 if (dstfmt->bits_per_pixel < 8) {
3150 return NULL;
3151 }
3152
3153 switch (surface->map.info.flags & ~SDL_COPY_RLE_MASK) {
3154 case 0:
3155 if (SDL_PIXELLAYOUT(srcfmt->format) == SDL_PACKEDLAYOUT_8888 &&
3156 SDL_PIXELLAYOUT(dstfmt->format) == SDL_PACKEDLAYOUT_8888) {
3157#ifdef SDL_AVX2_INTRINSICS
3158 if (SDL_HasAVX2()) {
3159 return Blit8888to8888PixelSwizzleAVX2;
3160 }
3161#endif
3162#ifdef SDL_SSE4_1_INTRINSICS
3163 if (SDL_HasSSE41()) {
3164 return Blit8888to8888PixelSwizzleSSE41;
3165 }
3166#endif
3167#if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8)
3168 return Blit8888to8888PixelSwizzleNEON;
3169#endif
3170 }
3171
3172 blitfun = NULL;
3173 if (dstfmt->bits_per_pixel > 8) {
3174 Uint32 a_need = NO_ALPHA;
3175 if (dstfmt->Amask) {
3176 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
3177 }
3178 if (srcfmt->bytes_per_pixel > 0 &&
3179 srcfmt->bytes_per_pixel <= SDL_arraysize(normal_blit)) {
3180 table = normal_blit[srcfmt->bytes_per_pixel - 1];
3181 for (which = 0; table[which].dstbpp; ++which) {
3182 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
3183 MASKOK(srcfmt->Gmask, table[which].srcG) &&
3184 MASKOK(srcfmt->Bmask, table[which].srcB) &&
3185 MASKOK(dstfmt->Rmask, table[which].dstR) &&
3186 MASKOK(dstfmt->Gmask, table[which].dstG) &&
3187 MASKOK(dstfmt->Bmask, table[which].dstB) &&
3188 dstfmt->bytes_per_pixel == table[which].dstbpp &&
3189 (a_need & table[which].alpha) == a_need &&
3190 ((table[which].blit_features & GetBlitFeatures()) ==
3191 table[which].blit_features)) {
3192 break;
3193 }
3194 }
3195 blitfun = table[which].blitfunc;
3196 }
3197
3198 if (blitfun == BlitNtoN) { // default C fallback catch-all. Slow!
3199 if (srcfmt->bytes_per_pixel == dstfmt->bytes_per_pixel &&
3200 srcfmt->Rmask == dstfmt->Rmask &&
3201 srcfmt->Gmask == dstfmt->Gmask &&
3202 srcfmt->Bmask == dstfmt->Bmask) {
3203 if (a_need == COPY_ALPHA) {
3204 if (srcfmt->Amask == dstfmt->Amask) {
3205 // Fastpath C fallback: RGBA<->RGBA blit with matching RGBA
3206 blitfun = SDL_BlitCopy;
3207 } else {
3208 blitfun = BlitNtoNCopyAlpha;
3209 }
3210 } else {
3211 if (srcfmt->bytes_per_pixel == 4) {
3212 // Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB
3213 blitfun = Blit4to4MaskAlpha;
3214 } else if (srcfmt->bytes_per_pixel == 2) {
3215 // Fastpath C fallback: 16bit RGB<->RGBA blit with matching RGB
3216 blitfun = Blit2to2MaskAlpha;
3217 }
3218 }
3219 } else if (a_need == COPY_ALPHA) {
3220 blitfun = BlitNtoNCopyAlpha;
3221 }
3222 }
3223 }
3224 return blitfun;
3225
3226 case SDL_COPY_COLORKEY:
3227 /* colorkey blit: Here we don't have too many options, mostly
3228 because RLE is the preferred fast way to deal with this.
3229 If a particular case turns out to be useful we'll add it. */
3230
3231 if (srcfmt->bytes_per_pixel == 2 && surface->map.identity != 0) {
3232 return Blit2to2Key;
3233 } else {
3234#ifdef SDL_ALTIVEC_BLITTERS
3235 if ((srcfmt->bytes_per_pixel == 4) && (dstfmt->bytes_per_pixel == 4) && SDL_HasAltiVec()) {
3236 return Blit32to32KeyAltivec;
3237 } else
3238#endif
3239 if (srcfmt->Amask && dstfmt->Amask) {
3240 return BlitNtoNKeyCopyAlpha;
3241 } else {
3242 return BlitNtoNKey;
3243 }
3244 }
3245 }
3246
3247 return NULL;
3248}
3249
3250#endif // SDL_HAVE_BLIT_N
3251