1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#ifndef SDL_blit_h_
24#define SDL_blit_h_
25
26// Table to do pixel byte expansion
27extern const Uint8 *SDL_expand_byte[9];
28extern const Uint16 SDL_expand_byte_10[];
29
30// SDL blit copy flags
31#define SDL_COPY_MODULATE_COLOR 0x00000001
32#define SDL_COPY_MODULATE_ALPHA 0x00000002
33#define SDL_COPY_MODULATE_MASK (SDL_COPY_MODULATE_COLOR | SDL_COPY_MODULATE_ALPHA)
34#define SDL_COPY_BLEND 0x00000010
35#define SDL_COPY_BLEND_PREMULTIPLIED 0x00000020
36#define SDL_COPY_ADD 0x00000040
37#define SDL_COPY_ADD_PREMULTIPLIED 0x00000080
38#define SDL_COPY_MOD 0x00000100
39#define SDL_COPY_MUL 0x00000200
40#define SDL_COPY_BLEND_MASK (SDL_COPY_BLEND | SDL_COPY_BLEND_PREMULTIPLIED | SDL_COPY_ADD | SDL_COPY_ADD_PREMULTIPLIED | SDL_COPY_MOD | SDL_COPY_MUL)
41#define SDL_COPY_COLORKEY 0x00000400
42#define SDL_COPY_NEAREST 0x00000800
43#define SDL_COPY_RLE_DESIRED 0x00001000
44#define SDL_COPY_RLE_COLORKEY 0x00002000
45#define SDL_COPY_RLE_ALPHAKEY 0x00004000
46#define SDL_COPY_RLE_MASK (SDL_COPY_RLE_DESIRED | SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY)
47
48// SDL blit CPU flags
49#define SDL_CPU_ANY 0x00000000
50#define SDL_CPU_MMX 0x00000001
51#define SDL_CPU_SSE 0x00000002
52#define SDL_CPU_SSE2 0x00000004
53#define SDL_CPU_ALTIVEC_PREFETCH 0x00000008
54#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000010
55
56typedef struct
57{
58 SDL_Surface *src_surface;
59 Uint8 *src;
60 int src_w, src_h;
61 int src_pitch;
62 int src_skip;
63 SDL_Surface *dst_surface;
64 Uint8 *dst;
65 int dst_w, dst_h;
66 int dst_pitch;
67 int dst_skip;
68 const SDL_PixelFormatDetails *src_fmt;
69 const SDL_Palette *src_pal;
70 const SDL_PixelFormatDetails *dst_fmt;
71 const SDL_Palette *dst_pal;
72 Uint8 *table;
73 SDL_HashTable *palette_map;
74 int flags;
75 Uint32 colorkey;
76 Uint8 r, g, b, a;
77} SDL_BlitInfo;
78
79typedef void (*SDL_BlitFunc)(SDL_BlitInfo *info);
80
81typedef struct
82{
83 SDL_PixelFormat src_format;
84 SDL_PixelFormat dst_format;
85 int flags;
86 unsigned int cpu;
87 SDL_BlitFunc func;
88} SDL_BlitFuncEntry;
89
90typedef bool (SDLCALL *SDL_Blit) (struct SDL_Surface *src, const SDL_Rect *srcrect, struct SDL_Surface *dst, const SDL_Rect *dstrect);
91
92// Blit mapping definition
93typedef struct SDL_BlitMap
94{
95 int identity;
96 SDL_Blit blit;
97 void *data;
98 SDL_BlitInfo info;
99
100 /* the version count matches the destination; mismatch indicates
101 an invalid mapping */
102 Uint32 dst_palette_version;
103 Uint32 src_palette_version;
104} SDL_BlitMap;
105
106// Functions found in SDL_blit.c
107extern bool SDL_CalculateBlit(SDL_Surface *surface, SDL_Surface *dst);
108
109/* Functions found in SDL_blit_*.c */
110extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface);
111extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface *surface);
112extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface);
113extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
114
115/*
116 * Useful macros for blitting routines
117 */
118
119#ifdef __GNUC__
120#define DECLARE_ALIGNED(t, v, a) t __attribute__((aligned(a))) v
121#elif defined(_MSC_VER)
122#define DECLARE_ALIGNED(t, v, a) __declspec(align(a)) t v
123#else
124#define DECLARE_ALIGNED(t, v, a) t v
125#endif
126
127// Load pixel of the specified format from a buffer and get its R-G-B values
128#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b) \
129 { \
130 r = SDL_expand_byte[fmt->Rbits][((Pixel & fmt->Rmask) >> fmt->Rshift)]; \
131 g = SDL_expand_byte[fmt->Gbits][((Pixel & fmt->Gmask) >> fmt->Gshift)]; \
132 b = SDL_expand_byte[fmt->Bbits][((Pixel & fmt->Bmask) >> fmt->Bshift)]; \
133 }
134#define RGB_FROM_RGB565(Pixel, r, g, b) \
135 { \
136 r = SDL_expand_byte[5][((Pixel & 0xF800) >> 11)]; \
137 g = SDL_expand_byte[6][((Pixel & 0x07E0) >> 5)]; \
138 b = SDL_expand_byte[5][(Pixel & 0x001F)]; \
139 }
140#define RGB_FROM_RGB555(Pixel, r, g, b) \
141 { \
142 r = SDL_expand_byte[5][((Pixel & 0x7C00) >> 10)]; \
143 g = SDL_expand_byte[5][((Pixel & 0x03E0) >> 5)]; \
144 b = SDL_expand_byte[5][(Pixel & 0x001F)]; \
145 }
146#define RGB_FROM_XRGB8888(Pixel, r, g, b) \
147 { \
148 r = ((Pixel & 0xFF0000) >> 16); \
149 g = ((Pixel & 0xFF00) >> 8); \
150 b = (Pixel & 0xFF); \
151 }
152#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel) \
153 do { \
154 switch (bpp) { \
155 case 1: \
156 Pixel = *((Uint8 *)(buf)); \
157 break; \
158 \
159 case 2: \
160 Pixel = *((Uint16 *)(buf)); \
161 break; \
162 \
163 case 3: \
164 { \
165 Uint8 *B = (Uint8 *)(buf); \
166 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
167 Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \
168 } else { \
169 Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \
170 } \
171 } break; \
172 \
173 case 4: \
174 Pixel = *((Uint32 *)(buf)); \
175 break; \
176 \
177 default: \
178 Pixel = 0; /* stop gcc complaints */ \
179 break; \
180 } \
181 } while (0)
182
183#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \
184 do { \
185 switch (bpp) { \
186 case 1: \
187 Pixel = *((Uint8 *)(buf)); \
188 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
189 break; \
190 \
191 case 2: \
192 Pixel = *((Uint16 *)(buf)); \
193 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
194 break; \
195 \
196 case 3: \
197 { \
198 Pixel = 0; \
199 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
200 r = *((buf) + fmt->Rshift / 8); \
201 g = *((buf) + fmt->Gshift / 8); \
202 b = *((buf) + fmt->Bshift / 8); \
203 } else { \
204 r = *((buf) + 2 - fmt->Rshift / 8); \
205 g = *((buf) + 2 - fmt->Gshift / 8); \
206 b = *((buf) + 2 - fmt->Bshift / 8); \
207 } \
208 } break; \
209 \
210 case 4: \
211 Pixel = *((Uint32 *)(buf)); \
212 RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \
213 break; \
214 \
215 default: \
216 /* stop gcc complaints */ \
217 Pixel = 0; \
218 r = g = b = 0; \
219 break; \
220 } \
221 } while (0)
222
223// Assemble R-G-B values into a specified pixel format and store them
224#define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \
225 { \
226 Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) | \
227 ((g >> (8 - fmt->Gbits)) << fmt->Gshift) | \
228 ((b >> (8 - fmt->Bbits)) << fmt->Bshift) | \
229 fmt->Amask; \
230 }
231#define RGB332_FROM_RGB(Pixel, r, g, b) \
232 { \
233 Pixel = (Uint8)(((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6)); \
234 }
235#define RGB565_FROM_RGB(Pixel, r, g, b) \
236 { \
237 Pixel = (Uint16)(((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3)); \
238 }
239#define RGB555_FROM_RGB(Pixel, r, g, b) \
240 { \
241 Pixel = (Uint16)(((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3)); \
242 }
243#define XRGB8888_FROM_RGB(Pixel, r, g, b) \
244 { \
245 Pixel = (r << 16) | (g << 8) | b; \
246 }
247#define ARGB8888_FROM_RGBA(Pixel, r, g, b, a) \
248 { \
249 Pixel = (a << 24) | (r << 16) | (g << 8) | b; \
250 }
251#define RGBA8888_FROM_RGBA(Pixel, r, g, b, a) \
252 { \
253 Pixel = (r << 24) | (g << 16) | (b << 8) | a; \
254 }
255#define ABGR8888_FROM_RGBA(Pixel, r, g, b, a) \
256 { \
257 Pixel = (a << 24) | (b << 16) | (g << 8) | r; \
258 }
259#define BGRA8888_FROM_RGBA(Pixel, r, g, b, a) \
260 { \
261 Pixel = (b << 24) | (g << 16) | (r << 8) | a; \
262 }
263#define ARGB2101010_FROM_RGBA(Pixel, r, g, b, a) \
264 { \
265 r = r ? ((r << 2) | 0x3) : 0; \
266 g = g ? ((g << 2) | 0x3) : 0; \
267 b = b ? ((b << 2) | 0x3) : 0; \
268 a = (a * 3) / 255; \
269 Pixel = (a << 30) | (r << 20) | (g << 10) | b; \
270 }
271#define ARGB2101010_FROM_RGBAFLOAT(Pixel, r, g, b, a) \
272 { \
273 r = SDL_clamp(r, 0.0f, 1.0f) * 1023.0f; \
274 g = SDL_clamp(g, 0.0f, 1.0f) * 1023.0f; \
275 b = SDL_clamp(b, 0.0f, 1.0f) * 1023.0f; \
276 a = SDL_clamp(a, 0.0f, 1.0f) * 3.0f; \
277 Pixel = (((Uint32)SDL_roundf(a)) << 30) | \
278 (((Uint32)SDL_roundf(r)) << 20) | \
279 (((Uint32)SDL_roundf(g)) << 10) | \
280 (Uint32)SDL_roundf(b); \
281 }
282#define ABGR2101010_FROM_RGBA(Pixel, r, g, b, a) \
283 { \
284 r = r ? ((r << 2) | 0x3) : 0; \
285 g = g ? ((g << 2) | 0x3) : 0; \
286 b = b ? ((b << 2) | 0x3) : 0; \
287 a = (a * 3) / 255; \
288 Pixel = (a << 30) | (b << 20) | (g << 10) | r; \
289 }
290#define ABGR2101010_FROM_RGBAFLOAT(Pixel, r, g, b, a) \
291 { \
292 r = SDL_clamp(r, 0.0f, 1.0f) * 1023.0f; \
293 g = SDL_clamp(g, 0.0f, 1.0f) * 1023.0f; \
294 b = SDL_clamp(b, 0.0f, 1.0f) * 1023.0f; \
295 a = SDL_clamp(a, 0.0f, 1.0f) * 3.0f; \
296 Pixel = (((Uint32)SDL_roundf(a)) << 30) | \
297 (((Uint32)SDL_roundf(b)) << 20) | \
298 (((Uint32)SDL_roundf(g)) << 10) | \
299 (Uint32)SDL_roundf(r); \
300 }
301#define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) \
302 { \
303 switch (bpp) { \
304 case 1: \
305 { \
306 Uint8 _pixel; \
307 \
308 PIXEL_FROM_RGB(_pixel, fmt, r, g, b); \
309 *((Uint8 *)(buf)) = _pixel; \
310 } break; \
311 \
312 case 2: \
313 { \
314 Uint16 _pixel; \
315 \
316 PIXEL_FROM_RGB(_pixel, fmt, r, g, b); \
317 *((Uint16 *)(buf)) = _pixel; \
318 } break; \
319 \
320 case 3: \
321 { \
322 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
323 *((buf) + fmt->Rshift / 8) = r; \
324 *((buf) + fmt->Gshift / 8) = g; \
325 *((buf) + fmt->Bshift / 8) = b; \
326 } else { \
327 *((buf) + 2 - fmt->Rshift / 8) = r; \
328 *((buf) + 2 - fmt->Gshift / 8) = g; \
329 *((buf) + 2 - fmt->Bshift / 8) = b; \
330 } \
331 } break; \
332 \
333 case 4: \
334 { \
335 Uint32 _pixel; \
336 \
337 PIXEL_FROM_RGB(_pixel, fmt, r, g, b); \
338 *((Uint32 *)(buf)) = _pixel; \
339 } break; \
340 } \
341 }
342
343// FIXME: Should we rescale alpha into 0..255 here?
344#define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a) \
345 { \
346 r = SDL_expand_byte[fmt->Rbits][((Pixel & fmt->Rmask) >> fmt->Rshift)]; \
347 g = SDL_expand_byte[fmt->Gbits][((Pixel & fmt->Gmask) >> fmt->Gshift)]; \
348 b = SDL_expand_byte[fmt->Bbits][((Pixel & fmt->Bmask) >> fmt->Bshift)]; \
349 a = SDL_expand_byte[fmt->Abits][((Pixel & fmt->Amask) >> fmt->Ashift)]; \
350 }
351#define RGBA_FROM_8888(Pixel, fmt, r, g, b, a) \
352 { \
353 r = (Pixel & fmt->Rmask) >> fmt->Rshift; \
354 g = (Pixel & fmt->Gmask) >> fmt->Gshift; \
355 b = (Pixel & fmt->Bmask) >> fmt->Bshift; \
356 a = (Pixel & fmt->Amask) >> fmt->Ashift; \
357 }
358#define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \
359 { \
360 r = (Pixel >> 24); \
361 g = ((Pixel >> 16) & 0xFF); \
362 b = ((Pixel >> 8) & 0xFF); \
363 a = (Pixel & 0xFF); \
364 }
365#define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \
366 { \
367 r = ((Pixel >> 16) & 0xFF); \
368 g = ((Pixel >> 8) & 0xFF); \
369 b = (Pixel & 0xFF); \
370 a = (Pixel >> 24); \
371 }
372#define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \
373 { \
374 r = (Pixel & 0xFF); \
375 g = ((Pixel >> 8) & 0xFF); \
376 b = ((Pixel >> 16) & 0xFF); \
377 a = (Pixel >> 24); \
378 }
379#define RGBA_FROM_BGRA8888(Pixel, r, g, b, a) \
380 { \
381 r = ((Pixel >> 8) & 0xFF); \
382 g = ((Pixel >> 16) & 0xFF); \
383 b = (Pixel >> 24); \
384 a = (Pixel & 0xFF); \
385 }
386#define RGBA_FROM_ARGB2101010(Pixel, r, g, b, a) \
387 { \
388 r = ((Pixel >> 22) & 0xFF); \
389 g = ((Pixel >> 12) & 0xFF); \
390 b = ((Pixel >> 2) & 0xFF); \
391 a = SDL_expand_byte[2][(Pixel >> 30)]; \
392 }
393#define RGBAFLOAT_FROM_ARGB2101010(Pixel, r, g, b, a) \
394 { \
395 r = (float)((Pixel >> 20) & 0x3FF) / 1023.0f; \
396 g = (float)((Pixel >> 10) & 0x3FF) / 1023.0f; \
397 b = (float)((Pixel >> 0) & 0x3FF) / 1023.0f; \
398 a = (float)(Pixel >> 30) / 3.0f; \
399 }
400#define RGBA_FROM_ABGR2101010(Pixel, r, g, b, a) \
401 { \
402 r = ((Pixel >> 2) & 0xFF); \
403 g = ((Pixel >> 12) & 0xFF); \
404 b = ((Pixel >> 22) & 0xFF); \
405 a = SDL_expand_byte[2][(Pixel >> 30)]; \
406 }
407#define RGBAFLOAT_FROM_ABGR2101010(Pixel, r, g, b, a) \
408 { \
409 r = (float)((Pixel >> 0) & 0x3FF) / 1023.0f; \
410 g = (float)((Pixel >> 10) & 0x3FF) / 1023.0f; \
411 b = (float)((Pixel >> 20) & 0x3FF) / 1023.0f; \
412 a = (float)(Pixel >> 30) / 3.0f; \
413 }
414#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \
415 do { \
416 switch (bpp) { \
417 case 1: \
418 Pixel = *((Uint8 *)(buf)); \
419 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
420 break; \
421 \
422 case 2: \
423 Pixel = *((Uint16 *)(buf)); \
424 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
425 break; \
426 \
427 case 3: \
428 { \
429 Pixel = 0; \
430 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
431 r = *((buf) + fmt->Rshift / 8); \
432 g = *((buf) + fmt->Gshift / 8); \
433 b = *((buf) + fmt->Bshift / 8); \
434 } else { \
435 r = *((buf) + 2 - fmt->Rshift / 8); \
436 g = *((buf) + 2 - fmt->Gshift / 8); \
437 b = *((buf) + 2 - fmt->Bshift / 8); \
438 } \
439 a = 0xFF; \
440 } break; \
441 \
442 case 4: \
443 Pixel = *((Uint32 *)(buf)); \
444 RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \
445 break; \
446 \
447 default: \
448 /* stop gcc complaints */ \
449 Pixel = 0; \
450 r = g = b = a = 0; \
451 break; \
452 } \
453 } while (0)
454
455// FIXME: this isn't correct, especially for Alpha (maximum != 255)
456#define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
457 { \
458 Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) | \
459 ((g >> (8 - fmt->Gbits)) << fmt->Gshift) | \
460 ((b >> (8 - fmt->Bbits)) << fmt->Bshift) | \
461 ((a >> (8 - fmt->Abits)) << fmt->Ashift); \
462 }
463#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a) \
464 { \
465 switch (bpp) { \
466 case 1: \
467 { \
468 Uint8 _pixel; \
469 \
470 PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
471 *((Uint8 *)(buf)) = _pixel; \
472 } break; \
473 \
474 case 2: \
475 { \
476 Uint16 _pixel; \
477 \
478 PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
479 *((Uint16 *)(buf)) = _pixel; \
480 } break; \
481 \
482 case 3: \
483 { \
484 if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \
485 *((buf) + fmt->Rshift / 8) = r; \
486 *((buf) + fmt->Gshift / 8) = g; \
487 *((buf) + fmt->Bshift / 8) = b; \
488 } else { \
489 *((buf) + 2 - fmt->Rshift / 8) = r; \
490 *((buf) + 2 - fmt->Gshift / 8) = g; \
491 *((buf) + 2 - fmt->Bshift / 8) = b; \
492 } \
493 } break; \
494 \
495 case 4: \
496 { \
497 Uint32 _pixel; \
498 \
499 PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \
500 *((Uint32 *)(buf)) = _pixel; \
501 } break; \
502 } \
503 }
504
505// Convert any 32-bit 4-bpp pixel to ARGB format
506#define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst) \
507 do { \
508 Uint8 a, r, g, b; \
509 RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \
510 dst = a << 24 | r << 16 | g << 8 | b; \
511 } while (0)
512// Blend a single color channel or alpha value
513/* dC = ((sC * sA) + (dC * (255 - sA))) / 255 */
514#define ALPHA_BLEND_CHANNEL(sC, dC, sA) \
515 do { \
516 Uint16 x; \
517 x = ((sC - dC) * sA) + ((dC << 8) - dC); \
518 x += 0x1U; \
519 x += x >> 8; \
520 dC = x >> 8; \
521 } while (0)
522// Perform a division by 255 after a multiplication of two 8-bit color channels
523/* out = (sC * dC) / 255 */
524#define MULT_DIV_255(sC, dC, out) \
525 do { \
526 Uint16 x = sC * dC; \
527 x += 0x1U; \
528 x += x >> 8; \
529 out = x >> 8; \
530 } while (0)
531// Blend the RGB values of two pixels with an alpha value
532#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB) \
533 do { \
534 ALPHA_BLEND_CHANNEL(sR, dR, A); \
535 ALPHA_BLEND_CHANNEL(sG, dG, A); \
536 ALPHA_BLEND_CHANNEL(sB, dB, A); \
537 } while (0)
538
539// Blend two 8888 pixels with the same format
540/* Calculates dst = ((src * factor) + (dst * (255 - factor))) / 255 */
541// FIXME: SDL_SIZE_MAX might not be an integer literal
542#if defined(SIZE_MAX) && (SIZE_MAX == 0xffffffffffffffff)
543#define FACTOR_BLEND_8888(src, dst, factor) \
544 do { \
545 Uint64 src64 = src; \
546 src64 = (src64 | (src64 << 24)) & 0x00FF00FF00FF00FF; \
547 \
548 Uint64 dst64 = dst; \
549 dst64 = (dst64 | (dst64 << 24)) & 0x00FF00FF00FF00FF; \
550 \
551 dst64 = ((src64 - dst64) * factor) + (dst64 << 8) - dst64; \
552 dst64 += 0x0001000100010001; \
553 dst64 += (dst64 >> 8) & 0x00FF00FF00FF00FF; \
554 dst64 &= 0xFF00FF00FF00FF00; \
555 \
556 dst = (Uint32)((dst64 >> 8) | (dst64 >> 32)); \
557 } while (0)
558#else
559#define FACTOR_BLEND_8888(src, dst, factor) \
560 do { \
561 Uint32 src02 = src & 0x00FF00FF; \
562 Uint32 dst02 = dst & 0x00FF00FF; \
563 \
564 Uint32 src13 = (src >> 8) & 0x00FF00FF; \
565 Uint32 dst13 = (dst >> 8) & 0x00FF00FF; \
566 \
567 Uint32 res02 = ((src02 - dst02) * factor) + (dst02 << 8) - dst02; \
568 res02 += 0x00010001; \
569 res02 += (res02 >> 8) & 0x00FF00FF; \
570 res02 = (res02 >> 8) & 0x00FF00FF; \
571 \
572 Uint32 res13 = ((src13 - dst13) * factor) + (dst13 << 8) - dst13; \
573 res13 += 0x00010001; \
574 res13 += (res13 >> 8) & 0x00FF00FF; \
575 res13 &= 0xFF00FF00; \
576 dst = res02 | res13; \
577 } while (0)
578#endif
579
580// Alpha blend two 8888 pixels with the same formats.
581#define ALPHA_BLEND_8888(src, dst, fmt) \
582 do { \
583 Uint32 srcA = (src >> fmt->Ashift) & 0xFF; \
584 Uint32 tmp = src | fmt->Amask; \
585 FACTOR_BLEND_8888(tmp, dst, srcA); \
586 } while (0)
587
588// Alpha blend two 8888 pixels with differing formats.
589#define ALPHA_BLEND_SWIZZLE_8888(src, dst, srcfmt, dstfmt) \
590 do { \
591 Uint32 srcA = (src >> srcfmt->Ashift) & 0xFF; \
592 Uint32 tmp = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \
593 (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \
594 (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \
595 dstfmt->Amask; \
596 FACTOR_BLEND_8888(tmp, dst, srcA); \
597 } while (0)
598// Blend the RGBA values of two pixels
599#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
600 do { \
601 ALPHA_BLEND_CHANNEL(sR, dR, sA); \
602 ALPHA_BLEND_CHANNEL(sG, dG, sA); \
603 ALPHA_BLEND_CHANNEL(sB, dB, sA); \
604 ALPHA_BLEND_CHANNEL(255, dA, sA); \
605 } while (0)
606
607// This is a very useful loop for optimizing blitters
608#if defined(_MSC_VER) && (_MSC_VER == 1300)
609// There's a bug in the Visual C++ 7 optimizer when compiling this code
610#else
611#define USE_DUFFS_LOOP
612#endif
613
614#define DUFFS_LOOP1(pixel_copy_increment, width) \
615 { \
616 int n; \
617 for (n = width; n > 0; --n) { \
618 pixel_copy_increment; \
619 } \
620 }
621
622#ifdef USE_DUFFS_LOOP
623
624// 8-times unrolled loop
625#define DUFFS_LOOP8(pixel_copy_increment, width) \
626 { \
627 int n = (width + 7) / 8; \
628 switch (width & 7) { \
629 case 0: \
630 do { \
631 pixel_copy_increment; \
632 SDL_FALLTHROUGH; \
633 case 7: \
634 pixel_copy_increment; \
635 SDL_FALLTHROUGH; \
636 case 6: \
637 pixel_copy_increment; \
638 SDL_FALLTHROUGH; \
639 case 5: \
640 pixel_copy_increment; \
641 SDL_FALLTHROUGH; \
642 case 4: \
643 pixel_copy_increment; \
644 SDL_FALLTHROUGH; \
645 case 3: \
646 pixel_copy_increment; \
647 SDL_FALLTHROUGH; \
648 case 2: \
649 pixel_copy_increment; \
650 SDL_FALLTHROUGH; \
651 case 1: \
652 pixel_copy_increment; \
653 } while (--n > 0); \
654 } \
655 }
656
657// 4-times unrolled loop
658#define DUFFS_LOOP4(pixel_copy_increment, width) \
659 { \
660 int n = (width + 3) / 4; \
661 switch (width & 3) { \
662 case 0: \
663 do { \
664 pixel_copy_increment; \
665 SDL_FALLTHROUGH; \
666 case 3: \
667 pixel_copy_increment; \
668 SDL_FALLTHROUGH; \
669 case 2: \
670 pixel_copy_increment; \
671 SDL_FALLTHROUGH; \
672 case 1: \
673 pixel_copy_increment; \
674 } while (--n > 0); \
675 } \
676 }
677
678// 2-times unrolled loop
679#define DUFFS_LOOP2(pixel_copy_increment, width) \
680 { \
681 int n = (width + 1) / 2; \
682 switch (width & 1) { \
683 case 0: \
684 do { \
685 pixel_copy_increment; \
686 SDL_FALLTHROUGH; \
687 case 1: \
688 pixel_copy_increment; \
689 } while (--n > 0); \
690 } \
691 }
692
693// Use the 4-times version of the loop by default
694#define DUFFS_LOOP(pixel_copy_increment, width) \
695 DUFFS_LOOP4(pixel_copy_increment, width)
696// Use the 8-times version of the loop for simple routines
697#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \
698 DUFFS_LOOP8(pixel_copy_increment, width)
699
700// Special version of Duff's device for even more optimization
701#define DUFFS_LOOP_124(pixel_copy_increment1, \
702 pixel_copy_increment2, \
703 pixel_copy_increment4, width) \
704 { \
705 int n = width; \
706 if (n & 1) { \
707 pixel_copy_increment1; \
708 n -= 1; \
709 } \
710 if (n & 2) { \
711 pixel_copy_increment2; \
712 n -= 2; \
713 } \
714 if (n & 4) { \
715 pixel_copy_increment4; \
716 n -= 4; \
717 } \
718 if (n) { \
719 n /= 8; \
720 do { \
721 pixel_copy_increment4; \
722 pixel_copy_increment4; \
723 } while (--n > 0); \
724 } \
725 }
726
727#else
728
729// Don't use Duff's device to unroll loops
730#define DUFFS_LOOP(pixel_copy_increment, width) \
731 DUFFS_LOOP1(pixel_copy_increment, width)
732#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \
733 DUFFS_LOOP1(pixel_copy_increment, width)
734#define DUFFS_LOOP8(pixel_copy_increment, width) \
735 DUFFS_LOOP1(pixel_copy_increment, width)
736#define DUFFS_LOOP4(pixel_copy_increment, width) \
737 DUFFS_LOOP1(pixel_copy_increment, width)
738#define DUFFS_LOOP2(pixel_copy_increment, width) \
739 DUFFS_LOOP1(pixel_copy_increment, width)
740#define DUFFS_LOOP_124(pixel_copy_increment1, \
741 pixel_copy_increment2, \
742 pixel_copy_increment4, width) \
743 DUFFS_LOOP1(pixel_copy_increment1, width)
744
745#endif // USE_DUFFS_LOOP
746
747#if defined(_MSC_VER) && (_MSC_VER >= 600)
748#pragma warning(disable : 4244) // '=': conversion from 'X' to 'Y', possible loss of data
749#endif
750
751#endif // SDL_blit_h_
752