1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "../SDL_internal.h"
22
23#ifndef SDL_blit_h_
24#define SDL_blit_h_
25
26#include "SDL_cpuinfo.h"
27#include "SDL_endian.h"
28#include "SDL_surface.h"
29
30/* pixman ARM blitters are 32 bit only : */
31#if defined(__aarch64__)||defined(_M_ARM64)
32#undef SDL_ARM_SIMD_BLITTERS
33#undef SDL_ARM_NEON_BLITTERS
34#endif
35
36/* Table to do pixel byte expansion */
37extern Uint8* SDL_expand_byte[9];
38
39/* SDL blit copy flags */
40#define SDL_COPY_MODULATE_COLOR 0x00000001
41#define SDL_COPY_MODULATE_ALPHA 0x00000002
42#define SDL_COPY_BLEND 0x00000010
43#define SDL_COPY_ADD 0x00000020
44#define SDL_COPY_MOD 0x00000040
45#define SDL_COPY_MUL 0x00000080
46#define SDL_COPY_COLORKEY 0x00000100
47#define SDL_COPY_NEAREST 0x00000200
48#define SDL_COPY_RLE_DESIRED 0x00001000
49#define SDL_COPY_RLE_COLORKEY 0x00002000
50#define SDL_COPY_RLE_ALPHAKEY 0x00004000
51#define SDL_COPY_RLE_MASK (SDL_COPY_RLE_DESIRED|SDL_COPY_RLE_COLORKEY|SDL_COPY_RLE_ALPHAKEY)
52
53/* SDL blit CPU flags */
54#define SDL_CPU_ANY 0x00000000
55#define SDL_CPU_MMX 0x00000001
56#define SDL_CPU_3DNOW 0x00000002
57#define SDL_CPU_SSE 0x00000004
58#define SDL_CPU_SSE2 0x00000008
59#define SDL_CPU_ALTIVEC_PREFETCH 0x00000010
60#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000020
61
62typedef struct
63{
64 Uint8 *src;
65 int src_w, src_h;
66 int src_pitch;
67 int src_skip;
68 Uint8 *dst;
69 int dst_w, dst_h;
70 int dst_pitch;
71 int dst_skip;
72 SDL_PixelFormat *src_fmt;
73 SDL_PixelFormat *dst_fmt;
74 Uint8 *table;
75 int flags;
76 Uint32 colorkey;
77 Uint8 r, g, b, a;
78} SDL_BlitInfo;
79
80typedef void (*SDL_BlitFunc) (SDL_BlitInfo *info);
81
82
83typedef struct
84{
85 Uint32 src_format;
86 Uint32 dst_format;
87 int flags;
88 int cpu;
89 SDL_BlitFunc func;
90} SDL_BlitFuncEntry;
91
92/* Blit mapping definition */
93typedef struct SDL_BlitMap
94{
95 SDL_Surface *dst;
96 int identity;
97 SDL_blit blit;
98 void *data;
99 SDL_BlitInfo info;
100
101 /* the version count matches the destination; mismatch indicates
102 an invalid mapping */
103 Uint32 dst_palette_version;
104 Uint32 src_palette_version;
105} SDL_BlitMap;
106
107/* Functions found in SDL_blit.c */
108extern int SDL_CalculateBlit(SDL_Surface * surface);
109
110/* Functions found in SDL_blit_*.c */
111extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface * surface);
112extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface * surface);
113extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface * surface);
114extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface * surface);
115
116/*
117 * Useful macros for blitting routines
118 */
119
120#if defined(__GNUC__)
121#define DECLARE_ALIGNED(t,v,a) t __attribute__((aligned(a))) v
122#elif defined(_MSC_VER)
123#define DECLARE_ALIGNED(t,v,a) __declspec(align(a)) t v
124#else
125#define DECLARE_ALIGNED(t,v,a) t v
126#endif
127
128/* Load pixel of the specified format from a buffer and get its R-G-B values */
129#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b) \
130{ \
131 r = SDL_expand_byte[fmt->Rloss][((Pixel&fmt->Rmask)>>fmt->Rshift)]; \
132 g = SDL_expand_byte[fmt->Gloss][((Pixel&fmt->Gmask)>>fmt->Gshift)]; \
133 b = SDL_expand_byte[fmt->Bloss][((Pixel&fmt->Bmask)>>fmt->Bshift)]; \
134}
135#define RGB_FROM_RGB565(Pixel, r, g, b) \
136{ \
137 r = SDL_expand_byte[3][((Pixel&0xF800)>>11)]; \
138 g = SDL_expand_byte[2][((Pixel&0x07E0)>>5)]; \
139 b = SDL_expand_byte[3][(Pixel&0x001F)]; \
140}
141#define RGB_FROM_RGB555(Pixel, r, g, b) \
142{ \
143 r = SDL_expand_byte[3][((Pixel&0x7C00)>>10)]; \
144 g = SDL_expand_byte[3][((Pixel&0x03E0)>>5)]; \
145 b = SDL_expand_byte[3][(Pixel&0x001F)]; \
146}
147#define RGB_FROM_RGB888(Pixel, r, g, b) \
148{ \
149 r = ((Pixel&0xFF0000)>>16); \
150 g = ((Pixel&0xFF00)>>8); \
151 b = (Pixel&0xFF); \
152}
153#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel) \
154do { \
155 switch (bpp) { \
156 case 1: \
157 Pixel = *((Uint8 *)(buf)); \
158 break; \
159 \
160 case 2: \
161 Pixel = *((Uint16 *)(buf)); \
162 break; \
163 \
164 case 3: { \
165 Uint8 *B = (Uint8 *)(buf); \
166 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
167 Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
168 } else { \
169 Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
170 } \
171 } \
172 break; \
173 \
174 case 4: \
175 Pixel = *((Uint32 *)(buf)); \
176 break; \
177 \
178 default: \
179 Pixel = 0; /* stop gcc complaints */ \
180 break; \
181 } \
182} while (0)
183
184#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \
185do { \
186 switch (bpp) { \
187 case 1: \
188 Pixel = *((Uint8 *)(buf)); \
189 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
190 break; \
191 \
192 case 2: \
193 Pixel = *((Uint16 *)(buf)); \
194 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
195 break; \
196 \
197 case 3: { \
198 Pixel = 0; \
199 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
200 r = *((buf)+fmt->Rshift/8); \
201 g = *((buf)+fmt->Gshift/8); \
202 b = *((buf)+fmt->Bshift/8); \
203 } else { \
204 r = *((buf)+2-fmt->Rshift/8); \
205 g = *((buf)+2-fmt->Gshift/8); \
206 b = *((buf)+2-fmt->Bshift/8); \
207 } \
208 } \
209 break; \
210 \
211 case 4: \
212 Pixel = *((Uint32 *)(buf)); \
213 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
214 break; \
215 \
216 default: \
217 /* stop gcc complaints */ \
218 Pixel = 0; \
219 r = g = b = 0; \
220 break; \
221 } \
222} while (0)
223
224/* Assemble R-G-B values into a specified pixel format and store them */
225#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
226{ \
227 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
228 ((g>>fmt->Gloss)<<fmt->Gshift)| \
229 ((b>>fmt->Bloss)<<fmt->Bshift)| \
230 fmt->Amask; \
231}
232#define RGB565_FROM_RGB(Pixel, r, g, b) \
233{ \
234 Pixel = ((r>>3)<<11)|((g>>2)<<5)|(b>>3); \
235}
236#define RGB555_FROM_RGB(Pixel, r, g, b) \
237{ \
238 Pixel = ((r>>3)<<10)|((g>>3)<<5)|(b>>3); \
239}
240#define RGB888_FROM_RGB(Pixel, r, g, b) \
241{ \
242 Pixel = (r<<16)|(g<<8)|b; \
243}
244#define ARGB8888_FROM_RGBA(Pixel, r, g, b, a) \
245{ \
246 Pixel = (a<<24)|(r<<16)|(g<<8)|b; \
247}
248#define RGBA8888_FROM_RGBA(Pixel, r, g, b, a) \
249{ \
250 Pixel = (r<<24)|(g<<16)|(b<<8)|a; \
251}
252#define ABGR8888_FROM_RGBA(Pixel, r, g, b, a) \
253{ \
254 Pixel = (a<<24)|(b<<16)|(g<<8)|r; \
255}
256#define BGRA8888_FROM_RGBA(Pixel, r, g, b, a) \
257{ \
258 Pixel = (b<<24)|(g<<16)|(r<<8)|a; \
259}
260#define ARGB2101010_FROM_RGBA(Pixel, r, g, b, a) \
261{ \
262 r = r ? ((r << 2) | 0x3) : 0; \
263 g = g ? ((g << 2) | 0x3) : 0; \
264 b = b ? ((b << 2) | 0x3) : 0; \
265 a = (a * 3) / 255; \
266 Pixel = (a<<30)|(r<<20)|(g<<10)|b; \
267}
268#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) \
269{ \
270 switch (bpp) { \
271 case 1: { \
272 Uint8 _Pixel; \
273 \
274 PIXEL_FROM_RGB(_Pixel, fmt, r, g, b); \
275 *((Uint8 *)(buf)) = _Pixel; \
276 } \
277 break; \
278 \
279 case 2: { \
280 Uint16 _Pixel; \
281 \
282 PIXEL_FROM_RGB(_Pixel, fmt, r, g, b); \
283 *((Uint16 *)(buf)) = _Pixel; \
284 } \
285 break; \
286 \
287 case 3: { \
288 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
289 *((buf)+fmt->Rshift/8) = r; \
290 *((buf)+fmt->Gshift/8) = g; \
291 *((buf)+fmt->Bshift/8) = b; \
292 } else { \
293 *((buf)+2-fmt->Rshift/8) = r; \
294 *((buf)+2-fmt->Gshift/8) = g; \
295 *((buf)+2-fmt->Bshift/8) = b; \
296 } \
297 } \
298 break; \
299 \
300 case 4: { \
301 Uint32 _Pixel; \
302 \
303 PIXEL_FROM_RGB(_Pixel, fmt, r, g, b); \
304 *((Uint32 *)(buf)) = _Pixel; \
305 } \
306 break; \
307 } \
308}
309
310/* FIXME: Should we rescale alpha into 0..255 here? */
311#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a) \
312{ \
313 r = SDL_expand_byte[fmt->Rloss][((Pixel&fmt->Rmask)>>fmt->Rshift)]; \
314 g = SDL_expand_byte[fmt->Gloss][((Pixel&fmt->Gmask)>>fmt->Gshift)]; \
315 b = SDL_expand_byte[fmt->Bloss][((Pixel&fmt->Bmask)>>fmt->Bshift)]; \
316 a = SDL_expand_byte[fmt->Aloss][((Pixel&fmt->Amask)>>fmt->Ashift)]; \
317}
318#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a) \
319{ \
320 r = (Pixel&fmt->Rmask)>>fmt->Rshift; \
321 g = (Pixel&fmt->Gmask)>>fmt->Gshift; \
322 b = (Pixel&fmt->Bmask)>>fmt->Bshift; \
323 a = (Pixel&fmt->Amask)>>fmt->Ashift; \
324}
325#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \
326{ \
327 r = (Pixel>>24); \
328 g = ((Pixel>>16)&0xFF); \
329 b = ((Pixel>>8)&0xFF); \
330 a = (Pixel&0xFF); \
331}
332#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \
333{ \
334 r = ((Pixel>>16)&0xFF); \
335 g = ((Pixel>>8)&0xFF); \
336 b = (Pixel&0xFF); \
337 a = (Pixel>>24); \
338}
339#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \
340{ \
341 r = (Pixel&0xFF); \
342 g = ((Pixel>>8)&0xFF); \
343 b = ((Pixel>>16)&0xFF); \
344 a = (Pixel>>24); \
345}
346#define RGBA_FROM_BGRA8888(Pixel, r, g, b, a) \
347{ \
348 r = ((Pixel>>8)&0xFF); \
349 g = ((Pixel>>16)&0xFF); \
350 b = (Pixel>>24); \
351 a = (Pixel&0xFF); \
352}
353#define RGBA_FROM_ARGB2101010(Pixel, r, g, b, a) \
354{ \
355 r = ((Pixel>>22)&0xFF); \
356 g = ((Pixel>>12)&0xFF); \
357 b = ((Pixel>>2)&0xFF); \
358 a = SDL_expand_byte[6][(Pixel>>30)]; \
359}
360#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \
361do { \
362 switch (bpp) { \
363 case 1: \
364 Pixel = *((Uint8 *)(buf)); \
365 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
366 break; \
367 \
368 case 2: \
369 Pixel = *((Uint16 *)(buf)); \
370 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
371 break; \
372 \
373 case 3: { \
374 Pixel = 0; \
375 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
376 r = *((buf)+fmt->Rshift/8); \
377 g = *((buf)+fmt->Gshift/8); \
378 b = *((buf)+fmt->Bshift/8); \
379 } else { \
380 r = *((buf)+2-fmt->Rshift/8); \
381 g = *((buf)+2-fmt->Gshift/8); \
382 b = *((buf)+2-fmt->Bshift/8); \
383 } \
384 a = 0xFF; \
385 } \
386 break; \
387 \
388 case 4: \
389 Pixel = *((Uint32 *)(buf)); \
390 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
391 break; \
392 \
393 default: \
394 /* stop gcc complaints */ \
395 Pixel = 0; \
396 r = g = b = a = 0; \
397 break; \
398 } \
399} while (0)
400
401/* FIXME: this isn't correct, especially for Alpha (maximum != 255) */
402#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
403{ \
404 Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)| \
405 ((g>>fmt->Gloss)<<fmt->Gshift)| \
406 ((b>>fmt->Bloss)<<fmt->Bshift)| \
407 ((a>>fmt->Aloss)<<fmt->Ashift); \
408}
409#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a) \
410{ \
411 switch (bpp) { \
412 case 1: { \
413 Uint8 _pixel; \
414 \
415 PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
416 *((Uint8 *)(buf)) = _pixel; \
417 } \
418 break; \
419 \
420 case 2: { \
421 Uint16 _pixel; \
422 \
423 PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
424 *((Uint16 *)(buf)) = _pixel; \
425 } \
426 break; \
427 \
428 case 3: { \
429 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
430 *((buf)+fmt->Rshift/8) = r; \
431 *((buf)+fmt->Gshift/8) = g; \
432 *((buf)+fmt->Bshift/8) = b; \
433 } else { \
434 *((buf)+2-fmt->Rshift/8) = r; \
435 *((buf)+2-fmt->Gshift/8) = g; \
436 *((buf)+2-fmt->Bshift/8) = b; \
437 } \
438 } \
439 break; \
440 \
441 case 4: { \
442 Uint32 _pixel; \
443 \
444 PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
445 *((Uint32 *)(buf)) = _pixel; \
446 } \
447 break; \
448 } \
449}
450
451/* Blend the RGB values of two pixels with an alpha value */
452#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB) \
453do { \
454 dR = (Uint8)((((int)(sR-dR)*(int)A)/255)+dR); \
455 dG = (Uint8)((((int)(sG-dG)*(int)A)/255)+dG); \
456 dB = (Uint8)((((int)(sB-dB)*(int)A)/255)+dB); \
457} while(0)
458
459
460/* Blend the RGBA values of two pixels */
461#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
462do { \
463 dR = (Uint8)((((int)(sR-dR)*(int)sA)/255)+dR); \
464 dG = (Uint8)((((int)(sG-dG)*(int)sA)/255)+dG); \
465 dB = (Uint8)((((int)(sB-dB)*(int)sA)/255)+dB); \
466 dA = (Uint8)((int)sA+dA-((int)sA*dA)/255); \
467} while(0)
468
469
470/* This is a very useful loop for optimizing blitters */
471#if defined(_MSC_VER) && (_MSC_VER == 1300)
472/* There's a bug in the Visual C++ 7 optimizer when compiling this code */
473#else
474#define USE_DUFFS_LOOP
475#endif
476#ifdef USE_DUFFS_LOOP
477
478/* 8-times unrolled loop */
479#define DUFFS_LOOP8(pixel_copy_increment, width) \
480{ int n = (width+7)/8; \
481 switch (width & 7) { \
482 case 0: do { pixel_copy_increment; /* fallthrough */ \
483 case 7: pixel_copy_increment; /* fallthrough */ \
484 case 6: pixel_copy_increment; /* fallthrough */ \
485 case 5: pixel_copy_increment; /* fallthrough */ \
486 case 4: pixel_copy_increment; /* fallthrough */ \
487 case 3: pixel_copy_increment; /* fallthrough */ \
488 case 2: pixel_copy_increment; /* fallthrough */ \
489 case 1: pixel_copy_increment; /* fallthrough */ \
490 } while ( --n > 0 ); \
491 } \
492}
493
494/* 4-times unrolled loop */
495#define DUFFS_LOOP4(pixel_copy_increment, width) \
496{ int n = (width+3)/4; \
497 switch (width & 3) { \
498 case 0: do { pixel_copy_increment; /* fallthrough */ \
499 case 3: pixel_copy_increment; /* fallthrough */ \
500 case 2: pixel_copy_increment; /* fallthrough */ \
501 case 1: pixel_copy_increment; /* fallthrough */ \
502 } while (--n > 0); \
503 } \
504}
505
506/* Use the 8-times version of the loop by default */
507#define DUFFS_LOOP(pixel_copy_increment, width) \
508 DUFFS_LOOP8(pixel_copy_increment, width)
509
510/* Special version of Duff's device for even more optimization */
511#define DUFFS_LOOP_124(pixel_copy_increment1, \
512 pixel_copy_increment2, \
513 pixel_copy_increment4, width) \
514{ int n = width; \
515 if (n & 1) { \
516 pixel_copy_increment1; n -= 1; \
517 } \
518 if (n & 2) { \
519 pixel_copy_increment2; n -= 2; \
520 } \
521 if (n & 4) { \
522 pixel_copy_increment4; n -= 4; \
523 } \
524 if (n) { \
525 n /= 8; \
526 do { \
527 pixel_copy_increment4; \
528 pixel_copy_increment4; \
529 } while (--n > 0); \
530 } \
531}
532
533#else
534
535/* Don't use Duff's device to unroll loops */
536#define DUFFS_LOOP(pixel_copy_increment, width) \
537{ int n; \
538 for ( n=width; n > 0; --n ) { \
539 pixel_copy_increment; \
540 } \
541}
542#define DUFFS_LOOP8(pixel_copy_increment, width) \
543 DUFFS_LOOP(pixel_copy_increment, width)
544#define DUFFS_LOOP4(pixel_copy_increment, width) \
545 DUFFS_LOOP(pixel_copy_increment, width)
546#define DUFFS_LOOP_124(pixel_copy_increment1, \
547 pixel_copy_increment2, \
548 pixel_copy_increment4, width) \
549 DUFFS_LOOP(pixel_copy_increment1, width)
550
551#endif /* USE_DUFFS_LOOP */
552
553/* Prevent Visual C++ 6.0 from printing out stupid warnings */
554#if defined(_MSC_VER) && (_MSC_VER >= 600)
555#pragma warning(disable: 4550)
556#endif
557
558#endif /* SDL_blit_h_ */
559
560/* vi: set ts=4 sw=4 expandtab: */
561