1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "SDL_internal.h" |
22 | |
23 | #include "SDL_surface_c.h" |
24 | #include "SDL_blit_slow.h" |
25 | #include "SDL_pixels_c.h" |
26 | |
27 | typedef enum |
28 | { |
29 | SlowBlitPixelAccess_Index8, |
30 | SlowBlitPixelAccess_RGB, |
31 | SlowBlitPixelAccess_RGBA, |
32 | SlowBlitPixelAccess_10Bit, |
33 | SlowBlitPixelAccess_Large, |
34 | } SlowBlitPixelAccess; |
35 | |
36 | static SlowBlitPixelAccess GetPixelAccessMethod(SDL_PixelFormat format) |
37 | { |
38 | if (SDL_BYTESPERPIXEL(format) > 4) { |
39 | return SlowBlitPixelAccess_Large; |
40 | } else if (SDL_ISPIXELFORMAT_10BIT(format)) { |
41 | return SlowBlitPixelAccess_10Bit; |
42 | } else if (format == SDL_PIXELFORMAT_INDEX8) { |
43 | return SlowBlitPixelAccess_Index8; |
44 | } else if (SDL_ISPIXELFORMAT_ALPHA(format)) { |
45 | return SlowBlitPixelAccess_RGBA; |
46 | } else { |
47 | return SlowBlitPixelAccess_RGB; |
48 | } |
49 | } |
50 | |
51 | /* The ONE TRUE BLITTER |
52 | * This puppy has to handle all the unoptimized cases - yes, it's slow. |
53 | */ |
54 | void SDL_Blit_Slow(SDL_BlitInfo *info) |
55 | { |
56 | const int flags = info->flags; |
57 | const Uint32 modulateR = info->r; |
58 | const Uint32 modulateG = info->g; |
59 | const Uint32 modulateB = info->b; |
60 | const Uint32 modulateA = info->a; |
61 | Uint32 srcpixel = 0; |
62 | Uint32 srcR = 0, srcG = 0, srcB = 0, srcA = 0; |
63 | Uint32 dstpixel = 0; |
64 | Uint32 dstR = 0, dstG = 0, dstB = 0, dstA = 0; |
65 | Uint64 srcy, srcx; |
66 | Uint64 posy, posx; |
67 | Uint64 incy, incx; |
68 | const SDL_PixelFormatDetails *src_fmt = info->src_fmt; |
69 | const SDL_Palette *src_pal = info->src_pal; |
70 | const SDL_PixelFormatDetails *dst_fmt = info->dst_fmt; |
71 | const SDL_Palette *dst_pal = info->dst_pal; |
72 | SDL_HashTable *palette_map = info->palette_map; |
73 | int srcbpp = src_fmt->bytes_per_pixel; |
74 | int dstbpp = dst_fmt->bytes_per_pixel; |
75 | SlowBlitPixelAccess src_access; |
76 | SlowBlitPixelAccess dst_access; |
77 | Uint32 rgbmask = ~src_fmt->Amask; |
78 | Uint32 ckey = info->colorkey & rgbmask; |
79 | Uint32 last_pixel = 0; |
80 | Uint8 last_index = 0; |
81 | |
82 | src_access = GetPixelAccessMethod(src_fmt->format); |
83 | dst_access = GetPixelAccessMethod(dst_fmt->format); |
84 | if (dst_access == SlowBlitPixelAccess_Index8) { |
85 | last_index = SDL_LookupRGBAColor(palette_map, last_pixel, dst_pal); |
86 | } |
87 | |
88 | incy = ((Uint64)info->src_h << 16) / info->dst_h; |
89 | incx = ((Uint64)info->src_w << 16) / info->dst_w; |
90 | posy = incy / 2; // start at the middle of pixel |
91 | |
92 | while (info->dst_h--) { |
93 | Uint8 *src = 0; |
94 | Uint8 *dst = info->dst; |
95 | int n = info->dst_w; |
96 | posx = incx / 2; // start at the middle of pixel |
97 | srcy = posy >> 16; |
98 | while (n--) { |
99 | srcx = posx >> 16; |
100 | src = (info->src + (srcy * info->src_pitch) + (srcx * srcbpp)); |
101 | |
102 | switch (src_access) { |
103 | case SlowBlitPixelAccess_Index8: |
104 | srcpixel = *src; |
105 | srcR = src_pal->colors[srcpixel].r; |
106 | srcG = src_pal->colors[srcpixel].g; |
107 | srcB = src_pal->colors[srcpixel].b; |
108 | srcA = src_pal->colors[srcpixel].a; |
109 | break; |
110 | case SlowBlitPixelAccess_RGB: |
111 | DISEMBLE_RGB(src, srcbpp, src_fmt, srcpixel, srcR, srcG, srcB); |
112 | srcA = 0xFF; |
113 | break; |
114 | case SlowBlitPixelAccess_RGBA: |
115 | DISEMBLE_RGBA(src, srcbpp, src_fmt, srcpixel, srcR, srcG, srcB, srcA); |
116 | break; |
117 | case SlowBlitPixelAccess_10Bit: |
118 | srcpixel = *((Uint32 *)(src)); |
119 | switch (src_fmt->format) { |
120 | case SDL_PIXELFORMAT_XRGB2101010: |
121 | RGBA_FROM_ARGB2101010(srcpixel, srcR, srcG, srcB, srcA); |
122 | srcA = 0xFF; |
123 | break; |
124 | case SDL_PIXELFORMAT_XBGR2101010: |
125 | RGBA_FROM_ABGR2101010(srcpixel, srcR, srcG, srcB, srcA); |
126 | srcA = 0xFF; |
127 | break; |
128 | case SDL_PIXELFORMAT_ARGB2101010: |
129 | RGBA_FROM_ARGB2101010(srcpixel, srcR, srcG, srcB, srcA); |
130 | break; |
131 | case SDL_PIXELFORMAT_ABGR2101010: |
132 | RGBA_FROM_ABGR2101010(srcpixel, srcR, srcG, srcB, srcA); |
133 | break; |
134 | default: |
135 | break; |
136 | } |
137 | break; |
138 | case SlowBlitPixelAccess_Large: |
139 | // Handled in SDL_Blit_Slow_Float() |
140 | break; |
141 | } |
142 | |
143 | if (flags & SDL_COPY_COLORKEY) { |
144 | // srcpixel isn't set for 24 bpp |
145 | if (srcbpp == 3) { |
146 | srcpixel = (srcR << src_fmt->Rshift) | |
147 | (srcG << src_fmt->Gshift) | (srcB << src_fmt->Bshift); |
148 | } |
149 | if ((srcpixel & rgbmask) == ckey) { |
150 | posx += incx; |
151 | dst += dstbpp; |
152 | continue; |
153 | } |
154 | } |
155 | if (flags & SDL_COPY_BLEND_MASK) { |
156 | switch (dst_access) { |
157 | case SlowBlitPixelAccess_Index8: |
158 | dstpixel = *dst; |
159 | dstR = dst_pal->colors[dstpixel].r; |
160 | dstG = dst_pal->colors[dstpixel].g; |
161 | dstB = dst_pal->colors[dstpixel].b; |
162 | dstA = dst_pal->colors[dstpixel].a; |
163 | break; |
164 | case SlowBlitPixelAccess_RGB: |
165 | DISEMBLE_RGB(dst, dstbpp, dst_fmt, dstpixel, dstR, dstG, dstB); |
166 | dstA = 0xFF; |
167 | break; |
168 | case SlowBlitPixelAccess_RGBA: |
169 | DISEMBLE_RGBA(dst, dstbpp, dst_fmt, dstpixel, dstR, dstG, dstB, dstA); |
170 | break; |
171 | case SlowBlitPixelAccess_10Bit: |
172 | dstpixel = *((Uint32 *)(dst)); |
173 | switch (dst_fmt->format) { |
174 | case SDL_PIXELFORMAT_XRGB2101010: |
175 | RGBA_FROM_ARGB2101010(dstpixel, dstR, dstG, dstB, dstA); |
176 | dstA = 0xFF; |
177 | break; |
178 | case SDL_PIXELFORMAT_XBGR2101010: |
179 | RGBA_FROM_ABGR2101010(dstpixel, dstR, dstG, dstB, dstA); |
180 | dstA = 0xFF; |
181 | break; |
182 | case SDL_PIXELFORMAT_ARGB2101010: |
183 | RGBA_FROM_ARGB2101010(dstpixel, dstR, dstG, dstB, dstA); |
184 | break; |
185 | case SDL_PIXELFORMAT_ABGR2101010: |
186 | RGBA_FROM_ABGR2101010(dstpixel, dstR, dstG, dstB, dstA); |
187 | break; |
188 | default: |
189 | break; |
190 | } |
191 | break; |
192 | case SlowBlitPixelAccess_Large: |
193 | // Handled in SDL_Blit_Slow_Float() |
194 | break; |
195 | } |
196 | } else { |
197 | // don't care |
198 | } |
199 | |
200 | if (flags & SDL_COPY_MODULATE_COLOR) { |
201 | srcR = (srcR * modulateR) / 255; |
202 | srcG = (srcG * modulateG) / 255; |
203 | srcB = (srcB * modulateB) / 255; |
204 | } |
205 | if (flags & SDL_COPY_MODULATE_ALPHA) { |
206 | srcA = (srcA * modulateA) / 255; |
207 | } |
208 | if (flags & (SDL_COPY_BLEND | SDL_COPY_ADD)) { |
209 | if (srcA < 255) { |
210 | srcR = (srcR * srcA) / 255; |
211 | srcG = (srcG * srcA) / 255; |
212 | srcB = (srcB * srcA) / 255; |
213 | } |
214 | } |
215 | switch (flags & SDL_COPY_BLEND_MASK) { |
216 | case 0: |
217 | dstR = srcR; |
218 | dstG = srcG; |
219 | dstB = srcB; |
220 | dstA = srcA; |
221 | break; |
222 | case SDL_COPY_BLEND: |
223 | dstR = srcR + ((255 - srcA) * dstR) / 255; |
224 | dstG = srcG + ((255 - srcA) * dstG) / 255; |
225 | dstB = srcB + ((255 - srcA) * dstB) / 255; |
226 | dstA = srcA + ((255 - srcA) * dstA) / 255; |
227 | break; |
228 | case SDL_COPY_BLEND_PREMULTIPLIED: |
229 | dstR = srcR + ((255 - srcA) * dstR) / 255; |
230 | if (dstR > 255) { |
231 | dstR = 255; |
232 | } |
233 | dstG = srcG + ((255 - srcA) * dstG) / 255; |
234 | if (dstG > 255) { |
235 | dstG = 255; |
236 | } |
237 | dstB = srcB + ((255 - srcA) * dstB) / 255; |
238 | if (dstB > 255) { |
239 | dstB = 255; |
240 | } |
241 | dstA = srcA + ((255 - srcA) * dstA) / 255; |
242 | if (dstA > 255) { |
243 | dstA = 255; |
244 | } |
245 | break; |
246 | case SDL_COPY_ADD: |
247 | case SDL_COPY_ADD_PREMULTIPLIED: |
248 | dstR = srcR + dstR; |
249 | if (dstR > 255) { |
250 | dstR = 255; |
251 | } |
252 | dstG = srcG + dstG; |
253 | if (dstG > 255) { |
254 | dstG = 255; |
255 | } |
256 | dstB = srcB + dstB; |
257 | if (dstB > 255) { |
258 | dstB = 255; |
259 | } |
260 | break; |
261 | case SDL_COPY_MOD: |
262 | dstR = (srcR * dstR) / 255; |
263 | dstG = (srcG * dstG) / 255; |
264 | dstB = (srcB * dstB) / 255; |
265 | break; |
266 | case SDL_COPY_MUL: |
267 | dstR = ((srcR * dstR) + (dstR * (255 - srcA))) / 255; |
268 | if (dstR > 255) { |
269 | dstR = 255; |
270 | } |
271 | dstG = ((srcG * dstG) + (dstG * (255 - srcA))) / 255; |
272 | if (dstG > 255) { |
273 | dstG = 255; |
274 | } |
275 | dstB = ((srcB * dstB) + (dstB * (255 - srcA))) / 255; |
276 | if (dstB > 255) { |
277 | dstB = 255; |
278 | } |
279 | break; |
280 | } |
281 | |
282 | switch (dst_access) { |
283 | case SlowBlitPixelAccess_Index8: |
284 | dstpixel = ((dstR << 24) | (dstG << 16) | (dstB << 8) | dstA); |
285 | if (dstpixel != last_pixel) { |
286 | last_pixel = dstpixel; |
287 | last_index = SDL_LookupRGBAColor(palette_map, dstpixel, dst_pal); |
288 | } |
289 | *dst = last_index; |
290 | break; |
291 | case SlowBlitPixelAccess_RGB: |
292 | ASSEMBLE_RGB(dst, dstbpp, dst_fmt, dstR, dstG, dstB); |
293 | break; |
294 | case SlowBlitPixelAccess_RGBA: |
295 | ASSEMBLE_RGBA(dst, dstbpp, dst_fmt, dstR, dstG, dstB, dstA); |
296 | break; |
297 | case SlowBlitPixelAccess_10Bit: |
298 | { |
299 | Uint32 pixel; |
300 | switch (dst_fmt->format) { |
301 | case SDL_PIXELFORMAT_XRGB2101010: |
302 | dstA = 0xFF; |
303 | SDL_FALLTHROUGH; |
304 | case SDL_PIXELFORMAT_ARGB2101010: |
305 | ARGB2101010_FROM_RGBA(pixel, dstR, dstG, dstB, dstA); |
306 | break; |
307 | case SDL_PIXELFORMAT_XBGR2101010: |
308 | dstA = 0xFF; |
309 | SDL_FALLTHROUGH; |
310 | case SDL_PIXELFORMAT_ABGR2101010: |
311 | ABGR2101010_FROM_RGBA(pixel, dstR, dstG, dstB, dstA); |
312 | break; |
313 | default: |
314 | pixel = 0; |
315 | break; |
316 | } |
317 | *(Uint32 *)dst = pixel; |
318 | break; |
319 | } |
320 | case SlowBlitPixelAccess_Large: |
321 | // Handled in SDL_Blit_Slow_Float() |
322 | break; |
323 | } |
324 | |
325 | posx += incx; |
326 | dst += dstbpp; |
327 | } |
328 | posy += incy; |
329 | info->dst += info->dst_pitch; |
330 | } |
331 | } |
332 | |
333 | /* Convert from F16 to float |
334 | * Public domain implementation from https://gist.github.com/rygorous/2144712 |
335 | */ |
336 | typedef union |
337 | { |
338 | Uint32 u; |
339 | float f; |
340 | struct |
341 | { |
342 | Uint32 Mantissa : 23; |
343 | Uint32 Exponent : 8; |
344 | Uint32 Sign : 1; |
345 | } x; |
346 | } FP32; |
347 | |
348 | #ifdef _MSC_VER |
349 | #pragma warning(push) |
350 | #pragma warning(disable:4214) |
351 | #endif |
352 | |
353 | typedef union |
354 | { |
355 | Uint16 u; |
356 | struct |
357 | { |
358 | Uint16 Mantissa : 10; |
359 | Uint16 Exponent : 5; |
360 | Uint16 Sign : 1; |
361 | } x; |
362 | } FP16; |
363 | |
364 | #ifdef _MSC_VER |
365 | #pragma warning(pop) |
366 | #endif |
367 | |
368 | static float half_to_float(Uint16 unValue) |
369 | { |
370 | static const FP32 magic = { (254 - 15) << 23 }; |
371 | static const FP32 was_infnan = { (127 + 16) << 23 }; |
372 | FP16 h; |
373 | FP32 o; |
374 | |
375 | h.u = unValue; |
376 | o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits |
377 | o.f *= magic.f; // exponent adjust |
378 | if (o.f >= was_infnan.f) // make sure Inf/NaN survive |
379 | o.u |= 255 << 23; |
380 | o.u |= (h.u & 0x8000) << 16; // sign bit |
381 | return o.f; |
382 | } |
383 | |
384 | /* Convert from float to F16 |
385 | * Public domain implementation from https://stackoverflow.com/questions/76799117/how-to-convert-a-float-to-a-half-type-and-the-other-way-around-in-c |
386 | */ |
387 | static Uint16 float_to_half(float a) |
388 | { |
389 | Uint32 ia; |
390 | Uint16 ir; |
391 | |
392 | SDL_memcpy(&ia, &a, sizeof(ia)); |
393 | |
394 | ir = (ia >> 16) & 0x8000; |
395 | if ((ia & 0x7f800000) == 0x7f800000) { |
396 | if ((ia & 0x7fffffff) == 0x7f800000) { |
397 | ir |= 0x7c00; // infinity |
398 | } else { |
399 | ir |= 0x7e00 | ((ia >> (24 - 11)) & 0x1ff); // NaN, quietened |
400 | } |
401 | } else if ((ia & 0x7f800000) >= 0x33000000) { |
402 | int shift = (int)((ia >> 23) & 0xff) - 127; |
403 | if (shift > 15) { |
404 | ir |= 0x7c00; // infinity |
405 | } else { |
406 | ia = (ia & 0x007fffff) | 0x00800000; // extract mantissa |
407 | if (shift < -14) { // denormal |
408 | ir |= ia >> (-1 - shift); |
409 | ia = ia << (32 - (-1 - shift)); |
410 | } else { // normal |
411 | ir |= ia >> (24 - 11); |
412 | ia = ia << (32 - (24 - 11)); |
413 | ir = ir + ((14 + shift) << 10); |
414 | } |
415 | // IEEE-754 round to nearest of even |
416 | if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) { |
417 | ir++; |
418 | } |
419 | } |
420 | } |
421 | return ir; |
422 | } |
423 | |
424 | static void ReadFloatPixel(Uint8 *pixels, SlowBlitPixelAccess access, const SDL_PixelFormatDetails *fmt, const SDL_Palette *pal, SDL_Colorspace colorspace, float SDR_white_point, |
425 | float *outR, float *outG, float *outB, float *outA) |
426 | { |
427 | Uint32 pixel; |
428 | Uint32 R, G, B, A; |
429 | float fR = 0.0f, fG = 0.0f, fB = 0.0f, fA = 0.0f; |
430 | float v[4]; |
431 | |
432 | switch (access) { |
433 | case SlowBlitPixelAccess_Index8: |
434 | pixel = *pixels; |
435 | fR = (float)pal->colors[pixel].r / 255.0f; |
436 | fG = (float)pal->colors[pixel].g / 255.0f; |
437 | fB = (float)pal->colors[pixel].b / 255.0f; |
438 | fA = (float)pal->colors[pixel].a / 255.0f; |
439 | break; |
440 | case SlowBlitPixelAccess_RGB: |
441 | DISEMBLE_RGB(pixels, fmt->bytes_per_pixel, fmt, pixel, R, G, B); |
442 | fR = (float)R / 255.0f; |
443 | fG = (float)G / 255.0f; |
444 | fB = (float)B / 255.0f; |
445 | fA = 1.0f; |
446 | break; |
447 | case SlowBlitPixelAccess_RGBA: |
448 | DISEMBLE_RGBA(pixels, fmt->bytes_per_pixel, fmt, pixel, R, G, B, A); |
449 | fR = (float)R / 255.0f; |
450 | fG = (float)G / 255.0f; |
451 | fB = (float)B / 255.0f; |
452 | fA = (float)A / 255.0f; |
453 | break; |
454 | case SlowBlitPixelAccess_10Bit: |
455 | pixel = *((Uint32 *)pixels); |
456 | switch (fmt->format) { |
457 | case SDL_PIXELFORMAT_XRGB2101010: |
458 | RGBAFLOAT_FROM_ARGB2101010(pixel, fR, fG, fB, fA); |
459 | fA = 1.0f; |
460 | break; |
461 | case SDL_PIXELFORMAT_XBGR2101010: |
462 | RGBAFLOAT_FROM_ABGR2101010(pixel, fR, fG, fB, fA); |
463 | fA = 1.0f; |
464 | break; |
465 | case SDL_PIXELFORMAT_ARGB2101010: |
466 | RGBAFLOAT_FROM_ARGB2101010(pixel, fR, fG, fB, fA); |
467 | break; |
468 | case SDL_PIXELFORMAT_ABGR2101010: |
469 | RGBAFLOAT_FROM_ABGR2101010(pixel, fR, fG, fB, fA); |
470 | break; |
471 | default: |
472 | fR = fG = fB = fA = 0.0f; |
473 | break; |
474 | } |
475 | break; |
476 | case SlowBlitPixelAccess_Large: |
477 | switch (SDL_PIXELTYPE(fmt->format)) { |
478 | case SDL_PIXELTYPE_ARRAYU16: |
479 | v[0] = (float)(((Uint16 *)pixels)[0]) / SDL_MAX_UINT16; |
480 | v[1] = (float)(((Uint16 *)pixels)[1]) / SDL_MAX_UINT16; |
481 | v[2] = (float)(((Uint16 *)pixels)[2]) / SDL_MAX_UINT16; |
482 | if (fmt->bytes_per_pixel == 8) { |
483 | v[3] = (float)(((Uint16 *)pixels)[3]) / SDL_MAX_UINT16; |
484 | } else { |
485 | v[3] = 1.0f; |
486 | } |
487 | break; |
488 | case SDL_PIXELTYPE_ARRAYF16: |
489 | v[0] = half_to_float(((Uint16 *)pixels)[0]); |
490 | v[1] = half_to_float(((Uint16 *)pixels)[1]); |
491 | v[2] = half_to_float(((Uint16 *)pixels)[2]); |
492 | if (fmt->bytes_per_pixel == 8) { |
493 | v[3] = half_to_float(((Uint16 *)pixels)[3]); |
494 | } else { |
495 | v[3] = 1.0f; |
496 | } |
497 | break; |
498 | case SDL_PIXELTYPE_ARRAYF32: |
499 | v[0] = ((float *)pixels)[0]; |
500 | v[1] = ((float *)pixels)[1]; |
501 | v[2] = ((float *)pixels)[2]; |
502 | if (fmt->bytes_per_pixel == 16) { |
503 | v[3] = ((float *)pixels)[3]; |
504 | } else { |
505 | v[3] = 1.0f; |
506 | } |
507 | break; |
508 | default: |
509 | // Unknown array type |
510 | v[0] = v[1] = v[2] = v[3] = 0.0f; |
511 | break; |
512 | } |
513 | switch (SDL_PIXELORDER(fmt->format)) { |
514 | case SDL_ARRAYORDER_RGB: |
515 | fR = v[0]; |
516 | fG = v[1]; |
517 | fB = v[2]; |
518 | fA = 1.0f; |
519 | break; |
520 | case SDL_ARRAYORDER_RGBA: |
521 | fR = v[0]; |
522 | fG = v[1]; |
523 | fB = v[2]; |
524 | fA = v[3]; |
525 | break; |
526 | case SDL_ARRAYORDER_ARGB: |
527 | fA = v[0]; |
528 | fR = v[1]; |
529 | fG = v[2]; |
530 | fB = v[3]; |
531 | break; |
532 | case SDL_ARRAYORDER_BGR: |
533 | fB = v[0]; |
534 | fG = v[1]; |
535 | fR = v[2]; |
536 | fA = 1.0f; |
537 | break; |
538 | case SDL_ARRAYORDER_BGRA: |
539 | fB = v[0]; |
540 | fG = v[1]; |
541 | fR = v[2]; |
542 | fA = v[3]; |
543 | break; |
544 | case SDL_ARRAYORDER_ABGR: |
545 | fA = v[0]; |
546 | fB = v[1]; |
547 | fG = v[2]; |
548 | fR = v[3]; |
549 | break; |
550 | default: |
551 | // Unknown array order |
552 | fA = fR = fG = fB = 0.0f; |
553 | break; |
554 | } |
555 | break; |
556 | } |
557 | |
558 | // Convert to nits so src and dst are guaranteed to be linear and in the same units |
559 | switch (SDL_COLORSPACETRANSFER(colorspace)) { |
560 | case SDL_TRANSFER_CHARACTERISTICS_SRGB: |
561 | fR = SDL_sRGBtoLinear(fR); |
562 | fG = SDL_sRGBtoLinear(fG); |
563 | fB = SDL_sRGBtoLinear(fB); |
564 | break; |
565 | case SDL_TRANSFER_CHARACTERISTICS_PQ: |
566 | fR = SDL_PQtoNits(fR) / SDR_white_point; |
567 | fG = SDL_PQtoNits(fG) / SDR_white_point; |
568 | fB = SDL_PQtoNits(fB) / SDR_white_point; |
569 | break; |
570 | case SDL_TRANSFER_CHARACTERISTICS_LINEAR: |
571 | fR /= SDR_white_point; |
572 | fG /= SDR_white_point; |
573 | fB /= SDR_white_point; |
574 | break; |
575 | default: |
576 | // Unknown, leave it alone |
577 | break; |
578 | } |
579 | |
580 | *outR = fR; |
581 | *outG = fG; |
582 | *outB = fB; |
583 | *outA = fA; |
584 | } |
585 | |
586 | static void WriteFloatPixel(Uint8 *pixels, SlowBlitPixelAccess access, const SDL_PixelFormatDetails *fmt, SDL_Colorspace colorspace, float SDR_white_point, |
587 | float fR, float fG, float fB, float fA) |
588 | { |
589 | Uint32 R, G, B, A; |
590 | Uint32 pixel; |
591 | float v[4]; |
592 | |
593 | // We converted to nits so src and dst are guaranteed to be linear and in the same units |
594 | switch (SDL_COLORSPACETRANSFER(colorspace)) { |
595 | case SDL_TRANSFER_CHARACTERISTICS_SRGB: |
596 | fR = SDL_sRGBfromLinear(fR); |
597 | fG = SDL_sRGBfromLinear(fG); |
598 | fB = SDL_sRGBfromLinear(fB); |
599 | break; |
600 | case SDL_TRANSFER_CHARACTERISTICS_PQ: |
601 | fR = SDL_PQfromNits(fR * SDR_white_point); |
602 | fG = SDL_PQfromNits(fG * SDR_white_point); |
603 | fB = SDL_PQfromNits(fB * SDR_white_point); |
604 | break; |
605 | case SDL_TRANSFER_CHARACTERISTICS_LINEAR: |
606 | fR *= SDR_white_point; |
607 | fG *= SDR_white_point; |
608 | fB *= SDR_white_point; |
609 | break; |
610 | default: |
611 | // Unknown, leave it alone |
612 | break; |
613 | } |
614 | |
615 | switch (access) { |
616 | case SlowBlitPixelAccess_Index8: |
617 | // This should never happen, checked before this call |
618 | SDL_assert(0); |
619 | break; |
620 | case SlowBlitPixelAccess_RGB: |
621 | R = (Uint8)SDL_roundf(SDL_clamp(fR, 0.0f, 1.0f) * 255.0f); |
622 | G = (Uint8)SDL_roundf(SDL_clamp(fG, 0.0f, 1.0f) * 255.0f); |
623 | B = (Uint8)SDL_roundf(SDL_clamp(fB, 0.0f, 1.0f) * 255.0f); |
624 | ASSEMBLE_RGB(pixels, fmt->bytes_per_pixel, fmt, R, G, B); |
625 | break; |
626 | case SlowBlitPixelAccess_RGBA: |
627 | R = (Uint8)SDL_roundf(SDL_clamp(fR, 0.0f, 1.0f) * 255.0f); |
628 | G = (Uint8)SDL_roundf(SDL_clamp(fG, 0.0f, 1.0f) * 255.0f); |
629 | B = (Uint8)SDL_roundf(SDL_clamp(fB, 0.0f, 1.0f) * 255.0f); |
630 | A = (Uint8)SDL_roundf(SDL_clamp(fA, 0.0f, 1.0f) * 255.0f); |
631 | ASSEMBLE_RGBA(pixels, fmt->bytes_per_pixel, fmt, R, G, B, A); |
632 | break; |
633 | case SlowBlitPixelAccess_10Bit: |
634 | { |
635 | switch (fmt->format) { |
636 | case SDL_PIXELFORMAT_XRGB2101010: |
637 | fA = 1.0f; |
638 | SDL_FALLTHROUGH; |
639 | case SDL_PIXELFORMAT_ARGB2101010: |
640 | ARGB2101010_FROM_RGBAFLOAT(pixel, fR, fG, fB, fA); |
641 | break; |
642 | case SDL_PIXELFORMAT_XBGR2101010: |
643 | fA = 1.0f; |
644 | SDL_FALLTHROUGH; |
645 | case SDL_PIXELFORMAT_ABGR2101010: |
646 | ABGR2101010_FROM_RGBAFLOAT(pixel, fR, fG, fB, fA); |
647 | break; |
648 | default: |
649 | pixel = 0; |
650 | break; |
651 | } |
652 | *(Uint32 *)pixels = pixel; |
653 | break; |
654 | } |
655 | case SlowBlitPixelAccess_Large: |
656 | switch (SDL_PIXELORDER(fmt->format)) { |
657 | case SDL_ARRAYORDER_RGB: |
658 | v[0] = fR; |
659 | v[1] = fG; |
660 | v[2] = fB; |
661 | v[3] = 1.0f; |
662 | break; |
663 | case SDL_ARRAYORDER_RGBA: |
664 | v[0] = fR; |
665 | v[1] = fG; |
666 | v[2] = fB; |
667 | v[3] = fA; |
668 | break; |
669 | case SDL_ARRAYORDER_ARGB: |
670 | v[0] = fA; |
671 | v[1] = fR; |
672 | v[2] = fG; |
673 | v[3] = fB; |
674 | break; |
675 | case SDL_ARRAYORDER_BGR: |
676 | v[0] = fB; |
677 | v[1] = fG; |
678 | v[2] = fR; |
679 | v[3] = 1.0f; |
680 | break; |
681 | case SDL_ARRAYORDER_BGRA: |
682 | v[0] = fB; |
683 | v[1] = fG; |
684 | v[2] = fR; |
685 | v[3] = fA; |
686 | break; |
687 | case SDL_ARRAYORDER_ABGR: |
688 | v[0] = fA; |
689 | v[1] = fB; |
690 | v[2] = fG; |
691 | v[3] = fR; |
692 | break; |
693 | default: |
694 | // Unknown array order |
695 | v[0] = v[1] = v[2] = v[3] = 0.0f; |
696 | break; |
697 | } |
698 | switch (SDL_PIXELTYPE(fmt->format)) { |
699 | case SDL_PIXELTYPE_ARRAYU16: |
700 | ((Uint16 *)pixels)[0] = (Uint16)SDL_roundf(SDL_clamp(v[0], 0.0f, 1.0f) * SDL_MAX_UINT16); |
701 | ((Uint16 *)pixels)[1] = (Uint16)SDL_roundf(SDL_clamp(v[1], 0.0f, 1.0f) * SDL_MAX_UINT16); |
702 | ((Uint16 *)pixels)[2] = (Uint16)SDL_roundf(SDL_clamp(v[2], 0.0f, 1.0f) * SDL_MAX_UINT16); |
703 | if (fmt->bytes_per_pixel == 8) { |
704 | ((Uint16 *)pixels)[3] = (Uint16)SDL_roundf(SDL_clamp(v[3], 0.0f, 1.0f) * SDL_MAX_UINT16); |
705 | } |
706 | break; |
707 | case SDL_PIXELTYPE_ARRAYF16: |
708 | ((Uint16 *)pixels)[0] = float_to_half(v[0]); |
709 | ((Uint16 *)pixels)[1] = float_to_half(v[1]); |
710 | ((Uint16 *)pixels)[2] = float_to_half(v[2]); |
711 | if (fmt->bytes_per_pixel == 8) { |
712 | ((Uint16 *)pixels)[3] = float_to_half(v[3]); |
713 | } |
714 | break; |
715 | case SDL_PIXELTYPE_ARRAYF32: |
716 | ((float *)pixels)[0] = v[0]; |
717 | ((float *)pixels)[1] = v[1]; |
718 | ((float *)pixels)[2] = v[2]; |
719 | if (fmt->bytes_per_pixel == 16) { |
720 | ((float *)pixels)[3] = v[3]; |
721 | } |
722 | break; |
723 | default: |
724 | // Unknown array type |
725 | break; |
726 | } |
727 | break; |
728 | } |
729 | } |
730 | |
731 | typedef enum |
732 | { |
733 | SDL_TONEMAP_NONE, |
734 | SDL_TONEMAP_LINEAR, |
735 | SDL_TONEMAP_CHROME |
736 | } SDL_TonemapOperator; |
737 | |
738 | typedef struct |
739 | { |
740 | SDL_TonemapOperator op; |
741 | |
742 | union { |
743 | struct { |
744 | float scale; |
745 | } linear; |
746 | |
747 | struct { |
748 | float a; |
749 | float b; |
750 | const float *color_primaries_matrix; |
751 | } chrome; |
752 | |
753 | } data; |
754 | |
755 | } SDL_TonemapContext; |
756 | |
757 | static void TonemapLinear(float *r, float *g, float *b, float scale) |
758 | { |
759 | *r *= scale; |
760 | *g *= scale; |
761 | *b *= scale; |
762 | } |
763 | |
764 | /* This uses the same tonemapping algorithm developed by Google for Chrome: |
765 | * https://colab.research.google.com/drive/1hI10nq6L6ru_UFvz7-f7xQaQp0qarz_K |
766 | * |
767 | * Essentially, you use the source headroom and the destination headroom |
768 | * to calculate scaling factors: |
769 | * tonemap_a = (dst_headroom / (src_headroom * src_headroom)); |
770 | * tonemap_b = (1.0f / dst_headroom); |
771 | * |
772 | * Then you normalize your source color by the HDR whitepoint, |
773 | * and calculate a final scaling factor in BT.2020 colorspace. |
774 | */ |
775 | static void TonemapChrome(float *r, float *g, float *b, float tonemap_a, float tonemap_b) |
776 | { |
777 | float v1 = *r; |
778 | float v2 = *g; |
779 | float v3 = *b; |
780 | float vmax = SDL_max(v1, SDL_max(v2, v3)); |
781 | |
782 | if (vmax > 0.0f) { |
783 | float scale = (1.0f + tonemap_a * vmax) / (1.0f + tonemap_b * vmax); |
784 | TonemapLinear(r, g, b, scale); |
785 | } |
786 | } |
787 | |
788 | static void ApplyTonemap(SDL_TonemapContext *ctx, float *r, float *g, float *b) |
789 | { |
790 | switch (ctx->op) { |
791 | case SDL_TONEMAP_LINEAR: |
792 | TonemapLinear(r, g, b, ctx->data.linear.scale); |
793 | break; |
794 | case SDL_TONEMAP_CHROME: |
795 | if (ctx->data.chrome.color_primaries_matrix) { |
796 | SDL_ConvertColorPrimaries(r, g, b, ctx->data.chrome.color_primaries_matrix); |
797 | } |
798 | TonemapChrome(r, g, b, ctx->data.chrome.a, ctx->data.chrome.b); |
799 | break; |
800 | default: |
801 | break; |
802 | } |
803 | } |
804 | |
805 | /* The SECOND TRUE BLITTER |
806 | * This one is even slower than the first, but also handles large pixel formats and colorspace conversion |
807 | */ |
808 | void SDL_Blit_Slow_Float(SDL_BlitInfo *info) |
809 | { |
810 | const int flags = info->flags; |
811 | const Uint32 modulateR = info->r; |
812 | const Uint32 modulateG = info->g; |
813 | const Uint32 modulateB = info->b; |
814 | const Uint32 modulateA = info->a; |
815 | float srcR, srcG, srcB, srcA; |
816 | float dstR, dstG, dstB, dstA; |
817 | Uint64 srcy, srcx; |
818 | Uint64 posy, posx; |
819 | Uint64 incy, incx; |
820 | const SDL_PixelFormatDetails *src_fmt = info->src_fmt; |
821 | const SDL_Palette *src_pal = info->src_pal; |
822 | const SDL_PixelFormatDetails *dst_fmt = info->dst_fmt; |
823 | const SDL_Palette *dst_pal = info->dst_pal; |
824 | SDL_HashTable *palette_map = info->palette_map; |
825 | int srcbpp = src_fmt->bytes_per_pixel; |
826 | int dstbpp = dst_fmt->bytes_per_pixel; |
827 | SlowBlitPixelAccess src_access; |
828 | SlowBlitPixelAccess dst_access; |
829 | SDL_Colorspace src_colorspace; |
830 | SDL_Colorspace dst_colorspace; |
831 | SDL_ColorPrimaries src_primaries; |
832 | SDL_ColorPrimaries dst_primaries; |
833 | const float *color_primaries_matrix = NULL; |
834 | float src_white_point; |
835 | float dst_white_point; |
836 | float dst_headroom; |
837 | float src_headroom; |
838 | SDL_TonemapContext tonemap; |
839 | Uint32 last_pixel = 0; |
840 | Uint8 last_index = 0; |
841 | |
842 | src_colorspace = info->src_surface->colorspace; |
843 | dst_colorspace = info->dst_surface->colorspace; |
844 | src_primaries = SDL_COLORSPACEPRIMARIES(src_colorspace); |
845 | dst_primaries = SDL_COLORSPACEPRIMARIES(dst_colorspace); |
846 | |
847 | src_white_point = SDL_GetSurfaceSDRWhitePoint(info->src_surface, src_colorspace); |
848 | dst_white_point = SDL_GetSurfaceSDRWhitePoint(info->dst_surface, dst_colorspace); |
849 | src_headroom = SDL_GetSurfaceHDRHeadroom(info->src_surface, src_colorspace); |
850 | dst_headroom = SDL_GetSurfaceHDRHeadroom(info->dst_surface, dst_colorspace); |
851 | if (dst_headroom == 0.0f) { |
852 | // The destination will have the same headroom as the source |
853 | dst_headroom = src_headroom; |
854 | SDL_SetFloatProperty(SDL_GetSurfaceProperties(info->dst_surface), SDL_PROP_SURFACE_HDR_HEADROOM_FLOAT, dst_headroom); |
855 | } |
856 | |
857 | SDL_zero(tonemap); |
858 | |
859 | if (src_headroom > dst_headroom) { |
860 | const char *tonemap_operator = SDL_GetStringProperty(SDL_GetSurfaceProperties(info->src_surface), SDL_PROP_SURFACE_TONEMAP_OPERATOR_STRING, NULL); |
861 | if (tonemap_operator) { |
862 | if (SDL_strncmp(tonemap_operator, "*=" , 2) == 0) { |
863 | tonemap.op = SDL_TONEMAP_LINEAR; |
864 | tonemap.data.linear.scale = (float)SDL_atof(tonemap_operator + 2); |
865 | } else if (SDL_strcasecmp(tonemap_operator, "chrome" ) == 0) { |
866 | tonemap.op = SDL_TONEMAP_CHROME; |
867 | } else if (SDL_strcasecmp(tonemap_operator, "none" ) == 0) { |
868 | tonemap.op = SDL_TONEMAP_NONE; |
869 | } |
870 | } else { |
871 | tonemap.op = SDL_TONEMAP_CHROME; |
872 | } |
873 | if (tonemap.op == SDL_TONEMAP_CHROME) { |
874 | tonemap.data.chrome.a = (dst_headroom / (src_headroom * src_headroom)); |
875 | tonemap.data.chrome.b = (1.0f / dst_headroom); |
876 | |
877 | // We'll convert to BT.2020 primaries for the tonemap operation |
878 | tonemap.data.chrome.color_primaries_matrix = SDL_GetColorPrimariesConversionMatrix(src_primaries, SDL_COLOR_PRIMARIES_BT2020); |
879 | if (tonemap.data.chrome.color_primaries_matrix) { |
880 | src_primaries = SDL_COLOR_PRIMARIES_BT2020; |
881 | } |
882 | } |
883 | } |
884 | |
885 | if (src_primaries != dst_primaries) { |
886 | color_primaries_matrix = SDL_GetColorPrimariesConversionMatrix(src_primaries, dst_primaries); |
887 | } |
888 | |
889 | src_access = GetPixelAccessMethod(src_fmt->format); |
890 | dst_access = GetPixelAccessMethod(dst_fmt->format); |
891 | if (dst_access == SlowBlitPixelAccess_Index8) { |
892 | last_index = SDL_LookupRGBAColor(palette_map, last_pixel, dst_pal); |
893 | } |
894 | |
895 | incy = ((Uint64)info->src_h << 16) / info->dst_h; |
896 | incx = ((Uint64)info->src_w << 16) / info->dst_w; |
897 | posy = incy / 2; // start at the middle of pixel |
898 | |
899 | while (info->dst_h--) { |
900 | Uint8 *src = 0; |
901 | Uint8 *dst = info->dst; |
902 | int n = info->dst_w; |
903 | posx = incx / 2; // start at the middle of pixel |
904 | srcy = posy >> 16; |
905 | while (n--) { |
906 | srcx = posx >> 16; |
907 | src = (info->src + (srcy * info->src_pitch) + (srcx * srcbpp)); |
908 | |
909 | ReadFloatPixel(src, src_access, src_fmt, src_pal, src_colorspace, src_white_point, &srcR, &srcG, &srcB, &srcA); |
910 | |
911 | if (tonemap.op) { |
912 | ApplyTonemap(&tonemap, &srcR, &srcG, &srcB); |
913 | } |
914 | |
915 | if (color_primaries_matrix) { |
916 | SDL_ConvertColorPrimaries(&srcR, &srcG, &srcB, color_primaries_matrix); |
917 | } |
918 | |
919 | if (flags & SDL_COPY_COLORKEY) { |
920 | // colorkey isn't supported |
921 | } |
922 | if ((flags & (SDL_COPY_BLEND | SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL))) { |
923 | ReadFloatPixel(dst, dst_access, dst_fmt, dst_pal, dst_colorspace, dst_white_point, &dstR, &dstG, &dstB, &dstA); |
924 | } else { |
925 | // don't care |
926 | dstR = dstG = dstB = dstA = 0.0f; |
927 | } |
928 | |
929 | if (flags & SDL_COPY_MODULATE_COLOR) { |
930 | srcR = (srcR * modulateR) / 255; |
931 | srcG = (srcG * modulateG) / 255; |
932 | srcB = (srcB * modulateB) / 255; |
933 | } |
934 | if (flags & SDL_COPY_MODULATE_ALPHA) { |
935 | srcA = (srcA * modulateA) / 255; |
936 | } |
937 | if (flags & (SDL_COPY_BLEND | SDL_COPY_ADD)) { |
938 | if (srcA < 1.0f) { |
939 | srcR = (srcR * srcA); |
940 | srcG = (srcG * srcA); |
941 | srcB = (srcB * srcA); |
942 | } |
943 | } |
944 | switch (flags & (SDL_COPY_BLEND | SDL_COPY_ADD | SDL_COPY_MOD | SDL_COPY_MUL)) { |
945 | case 0: |
946 | dstR = srcR; |
947 | dstG = srcG; |
948 | dstB = srcB; |
949 | dstA = srcA; |
950 | break; |
951 | case SDL_COPY_BLEND: |
952 | dstR = srcR + ((1.0f - srcA) * dstR); |
953 | dstG = srcG + ((1.0f - srcA) * dstG); |
954 | dstB = srcB + ((1.0f - srcA) * dstB); |
955 | dstA = srcA + ((1.0f - srcA) * dstA); |
956 | break; |
957 | case SDL_COPY_ADD: |
958 | dstR = srcR + dstR; |
959 | dstG = srcG + dstG; |
960 | dstB = srcB + dstB; |
961 | break; |
962 | case SDL_COPY_MOD: |
963 | dstR = (srcR * dstR); |
964 | dstG = (srcG * dstG); |
965 | dstB = (srcB * dstB); |
966 | break; |
967 | case SDL_COPY_MUL: |
968 | dstR = ((srcR * dstR) + (dstR * (1.0f - srcA))); |
969 | dstG = ((srcG * dstG) + (dstG * (1.0f - srcA))); |
970 | dstB = ((srcB * dstB) + (dstB * (1.0f - srcA))); |
971 | break; |
972 | } |
973 | |
974 | if (dst_access == SlowBlitPixelAccess_Index8) { |
975 | Uint32 R = (Uint8)SDL_roundf(SDL_clamp(SDL_sRGBfromLinear(dstR), 0.0f, 1.0f) * 255.0f); |
976 | Uint32 G = (Uint8)SDL_roundf(SDL_clamp(SDL_sRGBfromLinear(dstG), 0.0f, 1.0f) * 255.0f); |
977 | Uint32 B = (Uint8)SDL_roundf(SDL_clamp(SDL_sRGBfromLinear(dstB), 0.0f, 1.0f) * 255.0f); |
978 | Uint32 A = (Uint8)SDL_roundf(SDL_clamp(dstA, 0.0f, 1.0f) * 255.0f); |
979 | Uint32 dstpixel = ((R << 24) | (G << 16) | (B << 8) | A); |
980 | if (dstpixel != last_pixel) { |
981 | last_pixel = dstpixel; |
982 | last_index = SDL_LookupRGBAColor(palette_map, dstpixel, dst_pal); |
983 | } |
984 | *dst = last_index; |
985 | } else { |
986 | WriteFloatPixel(dst, dst_access, dst_fmt, dst_colorspace, dst_white_point, dstR, dstG, dstB, dstA); |
987 | } |
988 | |
989 | posx += incx; |
990 | dst += dstbpp; |
991 | } |
992 | posy += incy; |
993 | info->dst += info->dst_pitch; |
994 | } |
995 | } |
996 | |
997 | |