1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "SDL_internal.h" |
22 | |
23 | #ifndef SDL_blit_h_ |
24 | #define SDL_blit_h_ |
25 | |
26 | // Table to do pixel byte expansion |
27 | extern const Uint8 *SDL_expand_byte[9]; |
28 | extern const Uint16 SDL_expand_byte_10[]; |
29 | |
30 | // SDL blit copy flags |
31 | #define SDL_COPY_MODULATE_COLOR 0x00000001 |
32 | #define SDL_COPY_MODULATE_ALPHA 0x00000002 |
33 | #define SDL_COPY_MODULATE_MASK (SDL_COPY_MODULATE_COLOR | SDL_COPY_MODULATE_ALPHA) |
34 | #define SDL_COPY_BLEND 0x00000010 |
35 | #define SDL_COPY_BLEND_PREMULTIPLIED 0x00000020 |
36 | #define SDL_COPY_ADD 0x00000040 |
37 | #define SDL_COPY_ADD_PREMULTIPLIED 0x00000080 |
38 | #define SDL_COPY_MOD 0x00000100 |
39 | #define SDL_COPY_MUL 0x00000200 |
40 | #define SDL_COPY_BLEND_MASK (SDL_COPY_BLEND | SDL_COPY_BLEND_PREMULTIPLIED | SDL_COPY_ADD | SDL_COPY_ADD_PREMULTIPLIED | SDL_COPY_MOD | SDL_COPY_MUL) |
41 | #define SDL_COPY_COLORKEY 0x00000400 |
42 | #define SDL_COPY_NEAREST 0x00000800 |
43 | #define SDL_COPY_RLE_DESIRED 0x00001000 |
44 | #define SDL_COPY_RLE_COLORKEY 0x00002000 |
45 | #define SDL_COPY_RLE_ALPHAKEY 0x00004000 |
46 | #define SDL_COPY_RLE_MASK (SDL_COPY_RLE_DESIRED | SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY) |
47 | |
48 | // SDL blit CPU flags |
49 | #define SDL_CPU_ANY 0x00000000 |
50 | #define SDL_CPU_MMX 0x00000001 |
51 | #define SDL_CPU_SSE 0x00000002 |
52 | #define SDL_CPU_SSE2 0x00000004 |
53 | #define SDL_CPU_ALTIVEC_PREFETCH 0x00000008 |
54 | #define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000010 |
55 | |
56 | typedef struct |
57 | { |
58 | SDL_Surface *src_surface; |
59 | Uint8 *src; |
60 | int src_w, src_h; |
61 | int src_pitch; |
62 | int src_skip; |
63 | SDL_Surface *dst_surface; |
64 | Uint8 *dst; |
65 | int dst_w, dst_h; |
66 | int dst_pitch; |
67 | int dst_skip; |
68 | const SDL_PixelFormatDetails *src_fmt; |
69 | const SDL_Palette *src_pal; |
70 | const SDL_PixelFormatDetails *dst_fmt; |
71 | const SDL_Palette *dst_pal; |
72 | Uint8 *table; |
73 | SDL_HashTable *palette_map; |
74 | int flags; |
75 | Uint32 colorkey; |
76 | Uint8 r, g, b, a; |
77 | } SDL_BlitInfo; |
78 | |
79 | typedef void (*SDL_BlitFunc)(SDL_BlitInfo *info); |
80 | |
81 | typedef struct |
82 | { |
83 | SDL_PixelFormat src_format; |
84 | SDL_PixelFormat dst_format; |
85 | int flags; |
86 | unsigned int cpu; |
87 | SDL_BlitFunc func; |
88 | } SDL_BlitFuncEntry; |
89 | |
90 | typedef bool (SDLCALL *SDL_Blit) (struct SDL_Surface *src, const SDL_Rect *srcrect, struct SDL_Surface *dst, const SDL_Rect *dstrect); |
91 | |
92 | // Blit mapping definition |
93 | typedef struct SDL_BlitMap |
94 | { |
95 | int identity; |
96 | SDL_Blit blit; |
97 | void *data; |
98 | SDL_BlitInfo info; |
99 | |
100 | /* the version count matches the destination; mismatch indicates |
101 | an invalid mapping */ |
102 | Uint32 dst_palette_version; |
103 | Uint32 src_palette_version; |
104 | } SDL_BlitMap; |
105 | |
106 | // Functions found in SDL_blit.c |
107 | extern bool SDL_CalculateBlit(SDL_Surface *surface, SDL_Surface *dst); |
108 | |
109 | /* Functions found in SDL_blit_*.c */ |
110 | extern SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface); |
111 | extern SDL_BlitFunc SDL_CalculateBlit1(SDL_Surface *surface); |
112 | extern SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface); |
113 | extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); |
114 | |
115 | /* |
116 | * Useful macros for blitting routines |
117 | */ |
118 | |
119 | #ifdef __GNUC__ |
120 | #define DECLARE_ALIGNED(t, v, a) t __attribute__((aligned(a))) v |
121 | #elif defined(_MSC_VER) |
122 | #define DECLARE_ALIGNED(t, v, a) __declspec(align(a)) t v |
123 | #else |
124 | #define DECLARE_ALIGNED(t, v, a) t v |
125 | #endif |
126 | |
127 | // Load pixel of the specified format from a buffer and get its R-G-B values |
128 | #define RGB_FROM_PIXEL(Pixel, fmt, r, g, b) \ |
129 | { \ |
130 | r = SDL_expand_byte[fmt->Rbits][((Pixel & fmt->Rmask) >> fmt->Rshift)]; \ |
131 | g = SDL_expand_byte[fmt->Gbits][((Pixel & fmt->Gmask) >> fmt->Gshift)]; \ |
132 | b = SDL_expand_byte[fmt->Bbits][((Pixel & fmt->Bmask) >> fmt->Bshift)]; \ |
133 | } |
134 | #define RGB_FROM_RGB565(Pixel, r, g, b) \ |
135 | { \ |
136 | r = SDL_expand_byte[5][((Pixel & 0xF800) >> 11)]; \ |
137 | g = SDL_expand_byte[6][((Pixel & 0x07E0) >> 5)]; \ |
138 | b = SDL_expand_byte[5][(Pixel & 0x001F)]; \ |
139 | } |
140 | #define RGB_FROM_RGB555(Pixel, r, g, b) \ |
141 | { \ |
142 | r = SDL_expand_byte[5][((Pixel & 0x7C00) >> 10)]; \ |
143 | g = SDL_expand_byte[5][((Pixel & 0x03E0) >> 5)]; \ |
144 | b = SDL_expand_byte[5][(Pixel & 0x001F)]; \ |
145 | } |
146 | #define RGB_FROM_XRGB8888(Pixel, r, g, b) \ |
147 | { \ |
148 | r = ((Pixel & 0xFF0000) >> 16); \ |
149 | g = ((Pixel & 0xFF00) >> 8); \ |
150 | b = (Pixel & 0xFF); \ |
151 | } |
152 | #define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel) \ |
153 | do { \ |
154 | switch (bpp) { \ |
155 | case 1: \ |
156 | Pixel = *((Uint8 *)(buf)); \ |
157 | break; \ |
158 | \ |
159 | case 2: \ |
160 | Pixel = *((Uint16 *)(buf)); \ |
161 | break; \ |
162 | \ |
163 | case 3: \ |
164 | { \ |
165 | Uint8 *B = (Uint8 *)(buf); \ |
166 | if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \ |
167 | Pixel = B[0] + (B[1] << 8) + (B[2] << 16); \ |
168 | } else { \ |
169 | Pixel = (B[0] << 16) + (B[1] << 8) + B[2]; \ |
170 | } \ |
171 | } break; \ |
172 | \ |
173 | case 4: \ |
174 | Pixel = *((Uint32 *)(buf)); \ |
175 | break; \ |
176 | \ |
177 | default: \ |
178 | Pixel = 0; /* stop gcc complaints */ \ |
179 | break; \ |
180 | } \ |
181 | } while (0) |
182 | |
183 | #define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b) \ |
184 | do { \ |
185 | switch (bpp) { \ |
186 | case 1: \ |
187 | Pixel = *((Uint8 *)(buf)); \ |
188 | RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \ |
189 | break; \ |
190 | \ |
191 | case 2: \ |
192 | Pixel = *((Uint16 *)(buf)); \ |
193 | RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \ |
194 | break; \ |
195 | \ |
196 | case 3: \ |
197 | { \ |
198 | Pixel = 0; \ |
199 | if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \ |
200 | r = *((buf) + fmt->Rshift / 8); \ |
201 | g = *((buf) + fmt->Gshift / 8); \ |
202 | b = *((buf) + fmt->Bshift / 8); \ |
203 | } else { \ |
204 | r = *((buf) + 2 - fmt->Rshift / 8); \ |
205 | g = *((buf) + 2 - fmt->Gshift / 8); \ |
206 | b = *((buf) + 2 - fmt->Bshift / 8); \ |
207 | } \ |
208 | } break; \ |
209 | \ |
210 | case 4: \ |
211 | Pixel = *((Uint32 *)(buf)); \ |
212 | RGB_FROM_PIXEL(Pixel, fmt, r, g, b); \ |
213 | break; \ |
214 | \ |
215 | default: \ |
216 | /* stop gcc complaints */ \ |
217 | Pixel = 0; \ |
218 | r = g = b = 0; \ |
219 | break; \ |
220 | } \ |
221 | } while (0) |
222 | |
223 | // Assemble R-G-B values into a specified pixel format and store them |
224 | #define PIXEL_FROM_RGB(Pixel, fmt, r, g, b) \ |
225 | { \ |
226 | Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) | \ |
227 | ((g >> (8 - fmt->Gbits)) << fmt->Gshift) | \ |
228 | ((b >> (8 - fmt->Bbits)) << fmt->Bshift) | \ |
229 | fmt->Amask; \ |
230 | } |
231 | #define RGB332_FROM_RGB(Pixel, r, g, b) \ |
232 | { \ |
233 | Pixel = (Uint8)(((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6)); \ |
234 | } |
235 | #define RGB565_FROM_RGB(Pixel, r, g, b) \ |
236 | { \ |
237 | Pixel = (Uint16)(((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3)); \ |
238 | } |
239 | #define RGB555_FROM_RGB(Pixel, r, g, b) \ |
240 | { \ |
241 | Pixel = (Uint16)(((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3)); \ |
242 | } |
243 | #define XRGB8888_FROM_RGB(Pixel, r, g, b) \ |
244 | { \ |
245 | Pixel = (r << 16) | (g << 8) | b; \ |
246 | } |
247 | #define ARGB8888_FROM_RGBA(Pixel, r, g, b, a) \ |
248 | { \ |
249 | Pixel = (a << 24) | (r << 16) | (g << 8) | b; \ |
250 | } |
251 | #define RGBA8888_FROM_RGBA(Pixel, r, g, b, a) \ |
252 | { \ |
253 | Pixel = (r << 24) | (g << 16) | (b << 8) | a; \ |
254 | } |
255 | #define ABGR8888_FROM_RGBA(Pixel, r, g, b, a) \ |
256 | { \ |
257 | Pixel = (a << 24) | (b << 16) | (g << 8) | r; \ |
258 | } |
259 | #define BGRA8888_FROM_RGBA(Pixel, r, g, b, a) \ |
260 | { \ |
261 | Pixel = (b << 24) | (g << 16) | (r << 8) | a; \ |
262 | } |
263 | #define ARGB2101010_FROM_RGBA(Pixel, r, g, b, a) \ |
264 | { \ |
265 | r = r ? ((r << 2) | 0x3) : 0; \ |
266 | g = g ? ((g << 2) | 0x3) : 0; \ |
267 | b = b ? ((b << 2) | 0x3) : 0; \ |
268 | a = (a * 3) / 255; \ |
269 | Pixel = (a << 30) | (r << 20) | (g << 10) | b; \ |
270 | } |
271 | #define ARGB2101010_FROM_RGBAFLOAT(Pixel, r, g, b, a) \ |
272 | { \ |
273 | r = SDL_clamp(r, 0.0f, 1.0f) * 1023.0f; \ |
274 | g = SDL_clamp(g, 0.0f, 1.0f) * 1023.0f; \ |
275 | b = SDL_clamp(b, 0.0f, 1.0f) * 1023.0f; \ |
276 | a = SDL_clamp(a, 0.0f, 1.0f) * 3.0f; \ |
277 | Pixel = (((Uint32)SDL_roundf(a)) << 30) | \ |
278 | (((Uint32)SDL_roundf(r)) << 20) | \ |
279 | (((Uint32)SDL_roundf(g)) << 10) | \ |
280 | (Uint32)SDL_roundf(b); \ |
281 | } |
282 | #define ABGR2101010_FROM_RGBA(Pixel, r, g, b, a) \ |
283 | { \ |
284 | r = r ? ((r << 2) | 0x3) : 0; \ |
285 | g = g ? ((g << 2) | 0x3) : 0; \ |
286 | b = b ? ((b << 2) | 0x3) : 0; \ |
287 | a = (a * 3) / 255; \ |
288 | Pixel = (a << 30) | (b << 20) | (g << 10) | r; \ |
289 | } |
290 | #define ABGR2101010_FROM_RGBAFLOAT(Pixel, r, g, b, a) \ |
291 | { \ |
292 | r = SDL_clamp(r, 0.0f, 1.0f) * 1023.0f; \ |
293 | g = SDL_clamp(g, 0.0f, 1.0f) * 1023.0f; \ |
294 | b = SDL_clamp(b, 0.0f, 1.0f) * 1023.0f; \ |
295 | a = SDL_clamp(a, 0.0f, 1.0f) * 3.0f; \ |
296 | Pixel = (((Uint32)SDL_roundf(a)) << 30) | \ |
297 | (((Uint32)SDL_roundf(b)) << 20) | \ |
298 | (((Uint32)SDL_roundf(g)) << 10) | \ |
299 | (Uint32)SDL_roundf(r); \ |
300 | } |
301 | #define ASSEMBLE_RGB(buf, bpp, fmt, r, g, b) \ |
302 | { \ |
303 | switch (bpp) { \ |
304 | case 1: \ |
305 | { \ |
306 | Uint8 _pixel; \ |
307 | \ |
308 | PIXEL_FROM_RGB(_pixel, fmt, r, g, b); \ |
309 | *((Uint8 *)(buf)) = _pixel; \ |
310 | } break; \ |
311 | \ |
312 | case 2: \ |
313 | { \ |
314 | Uint16 _pixel; \ |
315 | \ |
316 | PIXEL_FROM_RGB(_pixel, fmt, r, g, b); \ |
317 | *((Uint16 *)(buf)) = _pixel; \ |
318 | } break; \ |
319 | \ |
320 | case 3: \ |
321 | { \ |
322 | if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \ |
323 | *((buf) + fmt->Rshift / 8) = r; \ |
324 | *((buf) + fmt->Gshift / 8) = g; \ |
325 | *((buf) + fmt->Bshift / 8) = b; \ |
326 | } else { \ |
327 | *((buf) + 2 - fmt->Rshift / 8) = r; \ |
328 | *((buf) + 2 - fmt->Gshift / 8) = g; \ |
329 | *((buf) + 2 - fmt->Bshift / 8) = b; \ |
330 | } \ |
331 | } break; \ |
332 | \ |
333 | case 4: \ |
334 | { \ |
335 | Uint32 _pixel; \ |
336 | \ |
337 | PIXEL_FROM_RGB(_pixel, fmt, r, g, b); \ |
338 | *((Uint32 *)(buf)) = _pixel; \ |
339 | } break; \ |
340 | } \ |
341 | } |
342 | |
343 | // FIXME: Should we rescale alpha into 0..255 here? |
344 | #define RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a) \ |
345 | { \ |
346 | r = SDL_expand_byte[fmt->Rbits][((Pixel & fmt->Rmask) >> fmt->Rshift)]; \ |
347 | g = SDL_expand_byte[fmt->Gbits][((Pixel & fmt->Gmask) >> fmt->Gshift)]; \ |
348 | b = SDL_expand_byte[fmt->Bbits][((Pixel & fmt->Bmask) >> fmt->Bshift)]; \ |
349 | a = SDL_expand_byte[fmt->Abits][((Pixel & fmt->Amask) >> fmt->Ashift)]; \ |
350 | } |
351 | #define RGBA_FROM_8888(Pixel, fmt, r, g, b, a) \ |
352 | { \ |
353 | r = (Pixel & fmt->Rmask) >> fmt->Rshift; \ |
354 | g = (Pixel & fmt->Gmask) >> fmt->Gshift; \ |
355 | b = (Pixel & fmt->Bmask) >> fmt->Bshift; \ |
356 | a = (Pixel & fmt->Amask) >> fmt->Ashift; \ |
357 | } |
358 | #define RGBA_FROM_RGBA8888(Pixel, r, g, b, a) \ |
359 | { \ |
360 | r = (Pixel >> 24); \ |
361 | g = ((Pixel >> 16) & 0xFF); \ |
362 | b = ((Pixel >> 8) & 0xFF); \ |
363 | a = (Pixel & 0xFF); \ |
364 | } |
365 | #define RGBA_FROM_ARGB8888(Pixel, r, g, b, a) \ |
366 | { \ |
367 | r = ((Pixel >> 16) & 0xFF); \ |
368 | g = ((Pixel >> 8) & 0xFF); \ |
369 | b = (Pixel & 0xFF); \ |
370 | a = (Pixel >> 24); \ |
371 | } |
372 | #define RGBA_FROM_ABGR8888(Pixel, r, g, b, a) \ |
373 | { \ |
374 | r = (Pixel & 0xFF); \ |
375 | g = ((Pixel >> 8) & 0xFF); \ |
376 | b = ((Pixel >> 16) & 0xFF); \ |
377 | a = (Pixel >> 24); \ |
378 | } |
379 | #define RGBA_FROM_BGRA8888(Pixel, r, g, b, a) \ |
380 | { \ |
381 | r = ((Pixel >> 8) & 0xFF); \ |
382 | g = ((Pixel >> 16) & 0xFF); \ |
383 | b = (Pixel >> 24); \ |
384 | a = (Pixel & 0xFF); \ |
385 | } |
386 | #define RGBA_FROM_ARGB2101010(Pixel, r, g, b, a) \ |
387 | { \ |
388 | r = ((Pixel >> 22) & 0xFF); \ |
389 | g = ((Pixel >> 12) & 0xFF); \ |
390 | b = ((Pixel >> 2) & 0xFF); \ |
391 | a = SDL_expand_byte[2][(Pixel >> 30)]; \ |
392 | } |
393 | #define RGBAFLOAT_FROM_ARGB2101010(Pixel, r, g, b, a) \ |
394 | { \ |
395 | r = (float)((Pixel >> 20) & 0x3FF) / 1023.0f; \ |
396 | g = (float)((Pixel >> 10) & 0x3FF) / 1023.0f; \ |
397 | b = (float)((Pixel >> 0) & 0x3FF) / 1023.0f; \ |
398 | a = (float)(Pixel >> 30) / 3.0f; \ |
399 | } |
400 | #define RGBA_FROM_ABGR2101010(Pixel, r, g, b, a) \ |
401 | { \ |
402 | r = ((Pixel >> 2) & 0xFF); \ |
403 | g = ((Pixel >> 12) & 0xFF); \ |
404 | b = ((Pixel >> 22) & 0xFF); \ |
405 | a = SDL_expand_byte[2][(Pixel >> 30)]; \ |
406 | } |
407 | #define RGBAFLOAT_FROM_ABGR2101010(Pixel, r, g, b, a) \ |
408 | { \ |
409 | r = (float)((Pixel >> 0) & 0x3FF) / 1023.0f; \ |
410 | g = (float)((Pixel >> 10) & 0x3FF) / 1023.0f; \ |
411 | b = (float)((Pixel >> 20) & 0x3FF) / 1023.0f; \ |
412 | a = (float)(Pixel >> 30) / 3.0f; \ |
413 | } |
414 | #define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a) \ |
415 | do { \ |
416 | switch (bpp) { \ |
417 | case 1: \ |
418 | Pixel = *((Uint8 *)(buf)); \ |
419 | RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \ |
420 | break; \ |
421 | \ |
422 | case 2: \ |
423 | Pixel = *((Uint16 *)(buf)); \ |
424 | RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \ |
425 | break; \ |
426 | \ |
427 | case 3: \ |
428 | { \ |
429 | Pixel = 0; \ |
430 | if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \ |
431 | r = *((buf) + fmt->Rshift / 8); \ |
432 | g = *((buf) + fmt->Gshift / 8); \ |
433 | b = *((buf) + fmt->Bshift / 8); \ |
434 | } else { \ |
435 | r = *((buf) + 2 - fmt->Rshift / 8); \ |
436 | g = *((buf) + 2 - fmt->Gshift / 8); \ |
437 | b = *((buf) + 2 - fmt->Bshift / 8); \ |
438 | } \ |
439 | a = 0xFF; \ |
440 | } break; \ |
441 | \ |
442 | case 4: \ |
443 | Pixel = *((Uint32 *)(buf)); \ |
444 | RGBA_FROM_PIXEL(Pixel, fmt, r, g, b, a); \ |
445 | break; \ |
446 | \ |
447 | default: \ |
448 | /* stop gcc complaints */ \ |
449 | Pixel = 0; \ |
450 | r = g = b = a = 0; \ |
451 | break; \ |
452 | } \ |
453 | } while (0) |
454 | |
455 | // FIXME: this isn't correct, especially for Alpha (maximum != 255) |
456 | #define PIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \ |
457 | { \ |
458 | Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) | \ |
459 | ((g >> (8 - fmt->Gbits)) << fmt->Gshift) | \ |
460 | ((b >> (8 - fmt->Bbits)) << fmt->Bshift) | \ |
461 | ((a >> (8 - fmt->Abits)) << fmt->Ashift); \ |
462 | } |
463 | #define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a) \ |
464 | { \ |
465 | switch (bpp) { \ |
466 | case 1: \ |
467 | { \ |
468 | Uint8 _pixel; \ |
469 | \ |
470 | PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \ |
471 | *((Uint8 *)(buf)) = _pixel; \ |
472 | } break; \ |
473 | \ |
474 | case 2: \ |
475 | { \ |
476 | Uint16 _pixel; \ |
477 | \ |
478 | PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \ |
479 | *((Uint16 *)(buf)) = _pixel; \ |
480 | } break; \ |
481 | \ |
482 | case 3: \ |
483 | { \ |
484 | if (SDL_BYTEORDER == SDL_LIL_ENDIAN) { \ |
485 | *((buf) + fmt->Rshift / 8) = r; \ |
486 | *((buf) + fmt->Gshift / 8) = g; \ |
487 | *((buf) + fmt->Bshift / 8) = b; \ |
488 | } else { \ |
489 | *((buf) + 2 - fmt->Rshift / 8) = r; \ |
490 | *((buf) + 2 - fmt->Gshift / 8) = g; \ |
491 | *((buf) + 2 - fmt->Bshift / 8) = b; \ |
492 | } \ |
493 | } break; \ |
494 | \ |
495 | case 4: \ |
496 | { \ |
497 | Uint32 _pixel; \ |
498 | \ |
499 | PIXEL_FROM_RGBA(_pixel, fmt, r, g, b, a); \ |
500 | *((Uint32 *)(buf)) = _pixel; \ |
501 | } break; \ |
502 | } \ |
503 | } |
504 | |
505 | // Convert any 32-bit 4-bpp pixel to ARGB format |
506 | #define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst) \ |
507 | do { \ |
508 | Uint8 a, r, g, b; \ |
509 | RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \ |
510 | dst = a << 24 | r << 16 | g << 8 | b; \ |
511 | } while (0) |
512 | // Blend a single color channel or alpha value |
513 | /* dC = ((sC * sA) + (dC * (255 - sA))) / 255 */ |
514 | #define ALPHA_BLEND_CHANNEL(sC, dC, sA) \ |
515 | do { \ |
516 | Uint16 x; \ |
517 | x = ((sC - dC) * sA) + ((dC << 8) - dC); \ |
518 | x += 0x1U; \ |
519 | x += x >> 8; \ |
520 | dC = x >> 8; \ |
521 | } while (0) |
522 | // Perform a division by 255 after a multiplication of two 8-bit color channels |
523 | /* out = (sC * dC) / 255 */ |
524 | #define MULT_DIV_255(sC, dC, out) \ |
525 | do { \ |
526 | Uint16 x = sC * dC; \ |
527 | x += 0x1U; \ |
528 | x += x >> 8; \ |
529 | out = x >> 8; \ |
530 | } while (0) |
531 | // Blend the RGB values of two pixels with an alpha value |
532 | #define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB) \ |
533 | do { \ |
534 | ALPHA_BLEND_CHANNEL(sR, dR, A); \ |
535 | ALPHA_BLEND_CHANNEL(sG, dG, A); \ |
536 | ALPHA_BLEND_CHANNEL(sB, dB, A); \ |
537 | } while (0) |
538 | |
539 | // Blend two 8888 pixels with the same format |
540 | /* Calculates dst = ((src * factor) + (dst * (255 - factor))) / 255 */ |
541 | // FIXME: SDL_SIZE_MAX might not be an integer literal |
542 | #if defined(SIZE_MAX) && (SIZE_MAX == 0xffffffffffffffff) |
543 | #define FACTOR_BLEND_8888(src, dst, factor) \ |
544 | do { \ |
545 | Uint64 src64 = src; \ |
546 | src64 = (src64 | (src64 << 24)) & 0x00FF00FF00FF00FF; \ |
547 | \ |
548 | Uint64 dst64 = dst; \ |
549 | dst64 = (dst64 | (dst64 << 24)) & 0x00FF00FF00FF00FF; \ |
550 | \ |
551 | dst64 = ((src64 - dst64) * factor) + (dst64 << 8) - dst64; \ |
552 | dst64 += 0x0001000100010001; \ |
553 | dst64 += (dst64 >> 8) & 0x00FF00FF00FF00FF; \ |
554 | dst64 &= 0xFF00FF00FF00FF00; \ |
555 | \ |
556 | dst = (Uint32)((dst64 >> 8) | (dst64 >> 32)); \ |
557 | } while (0) |
558 | #else |
559 | #define FACTOR_BLEND_8888(src, dst, factor) \ |
560 | do { \ |
561 | Uint32 src02 = src & 0x00FF00FF; \ |
562 | Uint32 dst02 = dst & 0x00FF00FF; \ |
563 | \ |
564 | Uint32 src13 = (src >> 8) & 0x00FF00FF; \ |
565 | Uint32 dst13 = (dst >> 8) & 0x00FF00FF; \ |
566 | \ |
567 | Uint32 res02 = ((src02 - dst02) * factor) + (dst02 << 8) - dst02; \ |
568 | res02 += 0x00010001; \ |
569 | res02 += (res02 >> 8) & 0x00FF00FF; \ |
570 | res02 = (res02 >> 8) & 0x00FF00FF; \ |
571 | \ |
572 | Uint32 res13 = ((src13 - dst13) * factor) + (dst13 << 8) - dst13; \ |
573 | res13 += 0x00010001; \ |
574 | res13 += (res13 >> 8) & 0x00FF00FF; \ |
575 | res13 &= 0xFF00FF00; \ |
576 | dst = res02 | res13; \ |
577 | } while (0) |
578 | #endif |
579 | |
580 | // Alpha blend two 8888 pixels with the same formats. |
581 | #define ALPHA_BLEND_8888(src, dst, fmt) \ |
582 | do { \ |
583 | Uint32 srcA = (src >> fmt->Ashift) & 0xFF; \ |
584 | Uint32 tmp = src | fmt->Amask; \ |
585 | FACTOR_BLEND_8888(tmp, dst, srcA); \ |
586 | } while (0) |
587 | |
588 | // Alpha blend two 8888 pixels with differing formats. |
589 | #define ALPHA_BLEND_SWIZZLE_8888(src, dst, srcfmt, dstfmt) \ |
590 | do { \ |
591 | Uint32 srcA = (src >> srcfmt->Ashift) & 0xFF; \ |
592 | Uint32 tmp = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \ |
593 | (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \ |
594 | (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \ |
595 | dstfmt->Amask; \ |
596 | FACTOR_BLEND_8888(tmp, dst, srcA); \ |
597 | } while (0) |
598 | // Blend the RGBA values of two pixels |
599 | #define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \ |
600 | do { \ |
601 | ALPHA_BLEND_CHANNEL(sR, dR, sA); \ |
602 | ALPHA_BLEND_CHANNEL(sG, dG, sA); \ |
603 | ALPHA_BLEND_CHANNEL(sB, dB, sA); \ |
604 | ALPHA_BLEND_CHANNEL(255, dA, sA); \ |
605 | } while (0) |
606 | |
607 | // This is a very useful loop for optimizing blitters |
608 | #if defined(_MSC_VER) && (_MSC_VER == 1300) |
609 | // There's a bug in the Visual C++ 7 optimizer when compiling this code |
610 | #else |
611 | #define USE_DUFFS_LOOP |
612 | #endif |
613 | |
614 | #define DUFFS_LOOP1(pixel_copy_increment, width) \ |
615 | { \ |
616 | int n; \ |
617 | for (n = width; n > 0; --n) { \ |
618 | pixel_copy_increment; \ |
619 | } \ |
620 | } |
621 | |
622 | #ifdef USE_DUFFS_LOOP |
623 | |
624 | // 8-times unrolled loop |
625 | #define DUFFS_LOOP8(pixel_copy_increment, width) \ |
626 | { \ |
627 | int n = (width + 7) / 8; \ |
628 | switch (width & 7) { \ |
629 | case 0: \ |
630 | do { \ |
631 | pixel_copy_increment; \ |
632 | SDL_FALLTHROUGH; \ |
633 | case 7: \ |
634 | pixel_copy_increment; \ |
635 | SDL_FALLTHROUGH; \ |
636 | case 6: \ |
637 | pixel_copy_increment; \ |
638 | SDL_FALLTHROUGH; \ |
639 | case 5: \ |
640 | pixel_copy_increment; \ |
641 | SDL_FALLTHROUGH; \ |
642 | case 4: \ |
643 | pixel_copy_increment; \ |
644 | SDL_FALLTHROUGH; \ |
645 | case 3: \ |
646 | pixel_copy_increment; \ |
647 | SDL_FALLTHROUGH; \ |
648 | case 2: \ |
649 | pixel_copy_increment; \ |
650 | SDL_FALLTHROUGH; \ |
651 | case 1: \ |
652 | pixel_copy_increment; \ |
653 | } while (--n > 0); \ |
654 | } \ |
655 | } |
656 | |
657 | // 4-times unrolled loop |
658 | #define DUFFS_LOOP4(pixel_copy_increment, width) \ |
659 | { \ |
660 | int n = (width + 3) / 4; \ |
661 | switch (width & 3) { \ |
662 | case 0: \ |
663 | do { \ |
664 | pixel_copy_increment; \ |
665 | SDL_FALLTHROUGH; \ |
666 | case 3: \ |
667 | pixel_copy_increment; \ |
668 | SDL_FALLTHROUGH; \ |
669 | case 2: \ |
670 | pixel_copy_increment; \ |
671 | SDL_FALLTHROUGH; \ |
672 | case 1: \ |
673 | pixel_copy_increment; \ |
674 | } while (--n > 0); \ |
675 | } \ |
676 | } |
677 | |
678 | // 2-times unrolled loop |
679 | #define DUFFS_LOOP2(pixel_copy_increment, width) \ |
680 | { \ |
681 | int n = (width + 1) / 2; \ |
682 | switch (width & 1) { \ |
683 | case 0: \ |
684 | do { \ |
685 | pixel_copy_increment; \ |
686 | SDL_FALLTHROUGH; \ |
687 | case 1: \ |
688 | pixel_copy_increment; \ |
689 | } while (--n > 0); \ |
690 | } \ |
691 | } |
692 | |
693 | // Use the 4-times version of the loop by default |
694 | #define DUFFS_LOOP(pixel_copy_increment, width) \ |
695 | DUFFS_LOOP4(pixel_copy_increment, width) |
696 | // Use the 8-times version of the loop for simple routines |
697 | #define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \ |
698 | DUFFS_LOOP8(pixel_copy_increment, width) |
699 | |
700 | // Special version of Duff's device for even more optimization |
701 | #define DUFFS_LOOP_124(pixel_copy_increment1, \ |
702 | pixel_copy_increment2, \ |
703 | pixel_copy_increment4, width) \ |
704 | { \ |
705 | int n = width; \ |
706 | if (n & 1) { \ |
707 | pixel_copy_increment1; \ |
708 | n -= 1; \ |
709 | } \ |
710 | if (n & 2) { \ |
711 | pixel_copy_increment2; \ |
712 | n -= 2; \ |
713 | } \ |
714 | if (n & 4) { \ |
715 | pixel_copy_increment4; \ |
716 | n -= 4; \ |
717 | } \ |
718 | if (n) { \ |
719 | n /= 8; \ |
720 | do { \ |
721 | pixel_copy_increment4; \ |
722 | pixel_copy_increment4; \ |
723 | } while (--n > 0); \ |
724 | } \ |
725 | } |
726 | |
727 | #else |
728 | |
729 | // Don't use Duff's device to unroll loops |
730 | #define DUFFS_LOOP(pixel_copy_increment, width) \ |
731 | DUFFS_LOOP1(pixel_copy_increment, width) |
732 | #define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \ |
733 | DUFFS_LOOP1(pixel_copy_increment, width) |
734 | #define DUFFS_LOOP8(pixel_copy_increment, width) \ |
735 | DUFFS_LOOP1(pixel_copy_increment, width) |
736 | #define DUFFS_LOOP4(pixel_copy_increment, width) \ |
737 | DUFFS_LOOP1(pixel_copy_increment, width) |
738 | #define DUFFS_LOOP2(pixel_copy_increment, width) \ |
739 | DUFFS_LOOP1(pixel_copy_increment, width) |
740 | #define DUFFS_LOOP_124(pixel_copy_increment1, \ |
741 | pixel_copy_increment2, \ |
742 | pixel_copy_increment4, width) \ |
743 | DUFFS_LOOP1(pixel_copy_increment1, width) |
744 | |
745 | #endif // USE_DUFFS_LOOP |
746 | |
747 | #if defined(_MSC_VER) && (_MSC_VER >= 600) |
748 | #pragma warning(disable : 4244) // '=': conversion from 'X' to 'Y', possible loss of data |
749 | #endif |
750 | |
751 | #endif // SDL_blit_h_ |
752 | |