SDL_RLEaccel.c source code [SDL/src/video/SDL_RLEaccel.c]

1	/*
2	Simple DirectMedia Layer
3	Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5	This software is provided 'as-is', without any express or implied
6	warranty. In no event will the authors be held liable for any damages
7	arising from the use of this software.
8
9	Permission is granted to anyone to use this software for any purpose,
10	including commercial applications, and to alter it and redistribute it
11	freely, subject to the following restrictions:
12
13	1. The origin of this software must not be misrepresented; you must not
14	claim that you wrote the original software. If you use this software
15	in a product, an acknowledgment in the product documentation would be
16	appreciated but is not required.
17	2. Altered source versions must be plainly marked as such, and must not be
18	misrepresented as being the original software.
19	3. This notice may not be removed or altered from any source distribution.
20	*/
21	#include "SDL_internal.h"
22
23	#ifdef SDL_HAVE_RLE
24
25	/*
26	* RLE encoding for software colorkey and alpha-channel acceleration
27	*
28	* Original version by Sam Lantinga
29	*
30	* Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
31	* decoder. Added per-surface alpha blitter. Added per-pixel alpha
32	* format, encoder and blitter.
33	*
34	* Many thanks to Xark and johns for hints, benchmarks and useful comments
35	* leading to this code.
36	*
37	* Welcome to Macro Mayhem.
38	*/
39
40	/*
41	* The encoding translates the image data to a stream of segments of the form
42	*
43	* <skip> <run> <data>
44	*
45	* where <skip> is the number of transparent pixels to skip,
46	* <run> is the number of opaque pixels to blit,
47	* and <data> are the pixels themselves.
48	*
49	* This basic structure is used both for colorkeyed surfaces, used for simple
50	* binary transparency and for per-surface alpha blending, and for surfaces
51	* with per-pixel alpha. The details differ, however:
52	*
53	* Encoding of colorkeyed surfaces:
54	*
55	* Encoded pixels always have the same format as the target surface.
56	* <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
57	* where they are 16 bit. This makes the pixel data aligned at all times.
58	* Segments never wrap around from one scan line to the next.
59	*
60	* The end of the sequence is marked by a zero <skip>,<run> pair at the *
61	* beginning of a line.
62	*
63	* Encoding of surfaces with per-pixel alpha:
64	*
65	* The sequence begins with an SDL_PixelFormat value describing the target
66	* pixel format, to provide reliable un-encoding.
67	*
68	* Each scan line is encoded twice: First all completely opaque pixels,
69	* encoded in the target format as described above, and then all
70	* partially transparent (translucent) pixels (where 1 <= alpha <= 254),
71	* in the following 32-bit format:
72	*
73	* For 32-bit targets, each pixel has the target RGB format but with
74	* the alpha value occupying the highest 8 bits. The <skip> and <run>
75	* counts are 16 bit.
76	*
77	* For 16-bit targets, each pixel has the target RGB format, but with
78	* the middle component (usually green) shifted 16 steps to the left,
79	* and the hole filled with the 5 most significant bits of the alpha value.
80	* i.e. if the target has the format rrrrrggggggbbbbb,
81	* the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
82	* The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
83	* for the translucent lines. Two padding bytes may be inserted
84	* before each translucent line to keep them 32-bit aligned.
85	*
86	* The end of the sequence is marked by a zero <skip>,<run> pair at the
87	* beginning of an opaque line.
88	*/
89
90	#include "SDL_sysvideo.h"
91	#include "SDL_surface_c.h"
92	#include "SDL_RLEaccel_c.h"
93
94	#define PIXEL_COPY(to, from, len, bpp) \
95	SDL_memcpy(to, from, (size_t)(len) * (bpp))
96
97	/*
98	* Various colorkey blit methods, for opaque and per-surface alpha
99	*/
100
101	#define OPAQUE_BLIT(to, from, length, bpp, alpha) \
102	PIXEL_COPY(to, from, length, bpp)
103
104	/*
105	* For 32bpp pixels on the form 0x00rrggbb:
106	* If we treat the middle component separately, we can process the two
107	* remaining in parallel. This is safe to do because of the gap to the left
108	* of each component, so the bits from the multiplication don't collide.
109	* This can be used for any RGB permutation of course.
110	*/
111	#define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \
112	do { \
113	int i; \
114	Uint32 src = (Uint32 )(from); \
115	Uint32 dst = (Uint32 )(to); \
116	for (i = 0; i < (int)(length); i++) { \
117	Uint32 s = *src++; \
118	Uint32 d = *dst; \
119	Uint32 s1 = s & 0xff00ff; \
120	Uint32 d1 = d & 0xff00ff; \
121	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
122	s &= 0xff00; \
123	d &= 0xff00; \
124	d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
125	*dst++ = d1 \| d; \
126	} \
127	} while (0)
128
129	/*
130	* For 16bpp pixels we can go a step further: put the middle component
131	* in the high 16 bits of a 32 bit word, and process all three RGB
132	* components at the same time. Since the smallest gap is here just
133	* 5 bits, we have to scale alpha down to 5 bits as well.
134	*/
135	#define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \
136	do { \
137	int i; \
138	Uint16 src = (Uint16 )(from); \
139	Uint16 dst = (Uint16 )(to); \
140	Uint32 ALPHA = alpha >> 3; \
141	for (i = 0; i < (int)(length); i++) { \
142	Uint32 s = *src++; \
143	Uint32 d = *dst; \
144	s = (s \| s << 16) & 0x07e0f81f; \
145	d = (d \| d << 16) & 0x07e0f81f; \
146	d += (s - d) * ALPHA >> 5; \
147	d &= 0x07e0f81f; \
148	*dst++ = (Uint16)(d \| d >> 16); \
149	} \
150	} while (0)
151
152	#define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \
153	do { \
154	int i; \
155	Uint16 src = (Uint16 )(from); \
156	Uint16 dst = (Uint16 )(to); \
157	Uint32 ALPHA = alpha >> 3; \
158	for (i = 0; i < (int)(length); i++) { \
159	Uint32 s = *src++; \
160	Uint32 d = *dst; \
161	s = (s \| s << 16) & 0x03e07c1f; \
162	d = (d \| d << 16) & 0x03e07c1f; \
163	d += (s - d) * ALPHA >> 5; \
164	d &= 0x03e07c1f; \
165	*dst++ = (Uint16)(d \| d >> 16); \
166	} \
167	} while (0)
168
169	/*
170	* The general slow catch-all function, for remaining depths and formats
171	*/
172	#define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \
173	do { \
174	int i; \
175	Uint8 *src = from; \
176	Uint8 *dst = to; \
177	for (i = 0; i < (int)(length); i++) { \
178	Uint32 s = 0, d = 0; \
179	unsigned rs, gs, bs, rd, gd, bd; \
180	switch (bpp) { \
181	case 2: \
182	s = (Uint16 )src; \
183	d = (Uint16 )dst; \
184	break; \
185	case 3: \
186	if (SDL_BYTEORDER == SDL_BIG_ENDIAN) { \
187	s = (src[0] << 16) \| (src[1] << 8) \| src[2]; \
188	d = (dst[0] << 16) \| (dst[1] << 8) \| dst[2]; \
189	} else { \
190	s = (src[2] << 16) \| (src[1] << 8) \| src[0]; \
191	d = (dst[2] << 16) \| (dst[1] << 8) \| dst[0]; \
192	} \
193	break; \
194	case 4: \
195	s = (Uint32 )src; \
196	d = (Uint32 )dst; \
197	break; \
198	} \
199	RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \
200	RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \
201	rd += (rs - rd) * alpha >> 8; \
202	gd += (gs - gd) * alpha >> 8; \
203	bd += (bs - bd) * alpha >> 8; \
204	PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \
205	switch (bpp) { \
206	case 2: \
207	(Uint16 )dst = (Uint16)d; \
208	break; \
209	case 3: \
210	if (SDL_BYTEORDER == SDL_BIG_ENDIAN) { \
211	dst[0] = (Uint8)(d >> 16); \
212	dst[1] = (Uint8)(d >> 8); \
213	dst[2] = (Uint8)(d); \
214	} else { \
215	dst[0] = (Uint8)d; \
216	dst[1] = (Uint8)(d >> 8); \
217	dst[2] = (Uint8)(d >> 16); \
218	} \
219	break; \
220	case 4: \
221	(Uint32 )dst = d; \
222	break; \
223	} \
224	src += bpp; \
225	dst += bpp; \
226	} \
227	} while (0)
228
229	/*
230	* Special case: 50% alpha (alpha=128)
231	* This is treated specially because it can be optimized very well, and
232	* since it is good for many cases of semi-translucency.
233	* The theory is to do all three components at the same time:
234	* First zero the lowest bit of each component, which gives us room to
235	* add them. Then shift right and add the sum of the lowest bits.
236	*/
237	#define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \
238	do { \
239	int i; \
240	Uint32 src = (Uint32 )(from); \
241	Uint32 dst = (Uint32 )(to); \
242	for (i = 0; i < (int)(length); i++) { \
243	Uint32 s = *src++; \
244	Uint32 d = *dst; \
245	*dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) + (s & d & 0x00010101); \
246	} \
247	} while (0)
248
249	/*
250	* For 16bpp, we can actually blend two pixels in parallel, if we take
251	* care to shift before we add, not after.
252	*/
253
254	// helper: blend a single 16 bit pixel at 50%
255	#define BLEND16_50(dst, src, mask) \
256	do { \
257	Uint32 s = *src++; \
258	Uint32 d = *dst; \
259	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) + \
260	(s & d & (~mask & 0xffff))); \
261	} while (0)
262
263	// basic 16bpp blender. mask is the pixels to keep when adding.
264	#define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \
265	do { \
266	unsigned n = (length); \
267	Uint16 src = (Uint16 )(from); \
268	Uint16 dst = (Uint16 )(to); \
269	if (((uintptr_t)src ^ (uintptr_t)dst) & 3) { \
270	/* source and destination not in phase, blit one by one */ \
271	while (n--) \
272	BLEND16_50(dst, src, mask); \
273	} else { \
274	if ((uintptr_t)src & 3) { \
275	/* first odd pixel */ \
276	BLEND16_50(dst, src, mask); \
277	n--; \
278	} \
279	for (; n > 1; n -= 2) { \
280	Uint32 s = (Uint32 )src; \
281	Uint32 d = (Uint32 )dst; \
282	(Uint32 )dst = ((s & (mask \| mask << 16)) >> 1) + ((d & (mask \| mask << 16)) >> 1) + (s & d & (~(mask \| mask << 16))); \
283	src += 2; \
284	dst += 2; \
285	} \
286	if (n) \
287	BLEND16_50(dst, src, mask); /* last odd pixel */ \
288	} \
289	} while (0)
290
291	#define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha) \
292	ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7deU)
293
294	#define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \
295	ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbdeU)
296
297	#define CHOOSE_BLIT(blitter, alpha, fmt) \
298	do { \
299	if (alpha == 255) { \
300	switch (fmt->bytes_per_pixel) { \
301	case 1: \
302	blitter(1, Uint8, OPAQUE_BLIT); \
303	break; \
304	case 2: \
305	blitter(2, Uint8, OPAQUE_BLIT); \
306	break; \
307	case 3: \
308	blitter(3, Uint8, OPAQUE_BLIT); \
309	break; \
310	case 4: \
311	blitter(4, Uint16, OPAQUE_BLIT); \
312	break; \
313	} \
314	} else { \
315	switch (fmt->bytes_per_pixel) { \
316	case 1: \
317	/* No 8bpp alpha blitting */ \
318	break; \
319	\
320	case 2: \
321	switch (fmt->Rmask \| fmt->Gmask \| fmt->Bmask) { \
322	case 0xffff: \
323	if (fmt->Gmask == 0x07e0 \|\| fmt->Rmask == 0x07e0 \|\| fmt->Bmask == 0x07e0) { \
324	if (alpha == 128) { \
325	blitter(2, Uint8, ALPHA_BLIT16_565_50); \
326	} else { \
327	blitter(2, Uint8, ALPHA_BLIT16_565); \
328	} \
329	} else { \
330	goto general16; \
331	} \
332	break; \
333	\
334	case 0x7fff: \
335	if (fmt->Gmask == 0x03e0 \|\| fmt->Rmask == 0x03e0 \|\| fmt->Bmask == 0x03e0) { \
336	if (alpha == 128) { \
337	blitter(2, Uint8, ALPHA_BLIT16_555_50); \
338	} else { \
339	blitter(2, Uint8, ALPHA_BLIT16_555); \
340	} \
341	break; \
342	} else { \
343	goto general16; \
344	} \
345	break; \
346	\
347	default: \
348	general16: \
349	blitter(2, Uint8, ALPHA_BLIT_ANY); \
350	} \
351	break; \
352	\
353	case 3: \
354	blitter(3, Uint8, ALPHA_BLIT_ANY); \
355	break; \
356	\
357	case 4: \
358	if ((fmt->Rmask \| fmt->Gmask \| fmt->Bmask) == 0x00ffffff && (fmt->Gmask == 0xff00 \|\| fmt->Rmask == 0xff00 \|\| fmt->Bmask == 0xff00)) { \
359	if (alpha == 128) { \
360	blitter(4, Uint16, ALPHA_BLIT32_888_50); \
361	} else { \
362	blitter(4, Uint16, ALPHA_BLIT32_888); \
363	} \
364	} else { \
365	blitter(4, Uint16, ALPHA_BLIT_ANY); \
366	} \
367	break; \
368	} \
369	} \
370	} while (0)
371
372	/*
373	* Set a pixel value using the given format, except that the alpha value is
374	* placed in the top byte. This is the format used for RLE with alpha.
375	*/
376	#define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a) \
377	{ \
378	Pixel = ((r >> (8 - fmt->Rbits)) << fmt->Rshift) \| \
379	((g >> (8 - fmt->Gbits)) << fmt->Gshift) \| \
380	((b >> (8 - fmt->Bbits)) << fmt->Bshift) \| \
381	(a << 24); \
382	}
383
384	/*
385	* This takes care of the case when the surface is clipped on the left and/or
386	* right. Top clipping has already been taken care of.
387	*/
388	#define RLECLIPBLIT(bpp, Type, do_blit) \
389	do { \
390	int linecount = srcrect->h; \
391	int ofs = 0; \
392	int left = srcrect->x; \
393	int right = left + srcrect->w; \
394	dstbuf -= left * bpp; \
395	for (;;) { \
396	int run; \
397	ofs += (Type )srcbuf; \
398	run = ((Type *)srcbuf)[1]; \
399	srcbuf += 2 * sizeof(Type); \
400	if (run) { \
401	/* clip to left and right borders */ \
402	if (ofs < right) { \
403	int start = 0; \
404	int len = run; \
405	int startcol; \
406	if (left - ofs > 0) { \
407	start = left - ofs; \
408	len -= start; \
409	if (len <= 0) \
410	goto nocopy##bpp##do_blit; \
411	} \
412	startcol = ofs + start; \
413	if (len > right - startcol) \
414	len = right - startcol; \
415	do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
416	len, bpp, alpha); \
417	} \
418	nocopy##bpp##do_blit : srcbuf += run * bpp; \
419	ofs += run; \
420	} else if (!ofs) { \
421	break; \
422	} \
423	\
424	if (ofs == w) { \
425	ofs = 0; \
426	dstbuf += surf_dst->pitch; \
427	if (!--linecount) { \
428	break; \
429	} \
430	} \
431	} \
432	} while (0)
433
434	static void RLEClipBlit(int w, Uint8 srcbuf, SDL_Surface surf_dst,
435	Uint8 dstbuf, const* SDL_Rect srcrect, unsigned* alpha)
436	{
437	const SDL_PixelFormatDetails *fmt = surf_dst->fmt;
438
439	CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
440	}
441
442	#undef RLECLIPBLIT
443
444	// blit a colorkeyed RLE surface
445	static bool SDLCALL SDL_RLEBlit(SDL_Surface surf_src, const* SDL_Rect *srcrect,
446	SDL_Surface surf_dst, const* SDL_Rect *dstrect)
447	{
448	Uint8 *dstbuf;
449	Uint8 *srcbuf;
450	int x, y;
451	int w = surf_src->w;
452	unsigned alpha;
453
454	// Lock the destination if necessary
455	if (SDL_MUSTLOCK(surf_dst)) {
456	if (!SDL_LockSurface(surf_dst)) {
457	return false;
458	}
459	}
460
461	// Set up the source and destination pointers
462	x = dstrect->x;
463	y = dstrect->y;
464	dstbuf = (Uint8 )surf_dst->pixels + y surf_dst->pitch + x * surf_src->fmt->bytes_per_pixel;
465	srcbuf = (Uint8 )surf_src->map.data + sizeof*(SDL_PixelFormat);
466
467	{
468	// skip lines at the top if necessary
469	int vskip = srcrect->y;
470	int ofs = `0`;
471	if (vskip) {
472
473	#define RLESKIP(bpp, Type) \
474	for (;;) { \
475	int run; \
476	ofs += (Type )srcbuf; \
477	run = ((Type *)srcbuf)[1]; \
478	srcbuf += sizeof(Type) * 2; \
479	if (run) { \
480	srcbuf += run * bpp; \
481	ofs += run; \
482	} else if (!ofs) \
483	goto done; \
484	if (ofs == w) { \
485	ofs = 0; \
486	if (!--vskip) \
487	break; \
488	} \
489	}
490
491	switch (surf_src->fmt->bytes_per_pixel) {
492	case `1`:
493	RLESKIP(`1`, Uint8);
494	break;
495	case `2`:
496	RLESKIP(`2`, Uint8);
497	break;
498	case `3`:
499	RLESKIP(`3`, Uint8);
500	break;
501	case `4`:
502	RLESKIP(`4`, Uint16);
503	break;
504	}
505
506	#undef RLESKIP
507	}
508	}
509
510	alpha = surf_src->map.info.a;
511	// if left or right edge clipping needed, call clip blit
512	if (srcrect->x \|\| srcrect->w != surf_src->w) {
513	RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
514	} else {
515	const SDL_PixelFormatDetails *fmt = surf_src->fmt;
516
517	#define RLEBLIT(bpp, Type, do_blit) \
518	do { \
519	int linecount = srcrect->h; \
520	int ofs = 0; \
521	for (;;) { \
522	unsigned run; \
523	ofs += (Type )srcbuf; \
524	run = ((Type *)srcbuf)[1]; \
525	srcbuf += 2 * sizeof(Type); \
526	if (run) { \
527	do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
528	srcbuf += run * bpp; \
529	ofs += run; \
530	} else if (!ofs) \
531	break; \
532	if (ofs == w) { \
533	ofs = 0; \
534	dstbuf += surf_dst->pitch; \
535	if (!--linecount) \
536	break; \
537	} \
538	} \
539	} while (0)
540
541	CHOOSE_BLIT(RLEBLIT, alpha, fmt);
542
543	#undef RLEBLIT
544	}
545
546	done:
547	// Unlock the destination if necessary
548	if (SDL_MUSTLOCK(surf_dst)) {
549	SDL_UnlockSurface(surf_dst);
550	}
551	return true;
552	}
553
554	#undef OPAQUE_BLIT
555
556	/*
557	* Per-pixel blitting macros for translucent pixels:
558	* These use the same techniques as the per-surface blitting macros
559	*/
560
561	/*
562	* For 32bpp pixels, we have made sure the alpha is stored in the top
563	* 8 bits, so proceed as usual
564	*/
565	#define BLIT_TRANSL_888(src, dst) \
566	do { \
567	Uint32 s = src; \
568	Uint32 d = dst; \
569	unsigned alpha = s >> 24; \
570	Uint32 s1 = s & 0xff00ff; \
571	Uint32 d1 = d & 0xff00ff; \
572	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
573	s &= 0xff00; \
574	d &= 0xff00; \
575	d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
576	dst = d1 \| d \| 0xff000000; \
577	} while (0)
578
579	/*
580	* For 16bpp pixels, we have stored the 5 most significant alpha bits in
581	* bits 5-10. As before, we can process all 3 RGB components at the same time.
582	*/
583	#define BLIT_TRANSL_565(src, dst) \
584	do { \
585	Uint32 s = src; \
586	Uint32 d = dst; \
587	unsigned alpha = (s & 0x3e0) >> 5; \
588	s &= 0x07e0f81f; \
589	d = (d \| d << 16) & 0x07e0f81f; \
590	d += (s - d) * alpha >> 5; \
591	d &= 0x07e0f81f; \
592	dst = (Uint16)(d \| d >> 16); \
593	} while (0)
594
595	#define BLIT_TRANSL_555(src, dst) \
596	do { \
597	Uint32 s = src; \
598	Uint32 d = dst; \
599	unsigned alpha = (s & 0x3e0) >> 5; \
600	s &= 0x03e07c1f; \
601	d = (d \| d << 16) & 0x03e07c1f; \
602	d += (s - d) * alpha >> 5; \
603	d &= 0x03e07c1f; \
604	dst = (Uint16)(d \| d >> 16); \
605	} while (0)
606
607	// blit a pixel-alpha RLE surface clipped at the right and/or left edges
608	static void RLEAlphaClipBlit(int w, Uint8 srcbuf, SDL_Surface surf_dst,
609	Uint8 dstbuf, const* SDL_Rect *srcrect)
610	{
611	const SDL_PixelFormatDetails *df = surf_dst->fmt;
612	/*
613	* clipped blitter: Ptype is the destination pixel type,
614	* Ctype the translucent count type, and do_blend the macro
615	* to blend one pixel.
616	*/
617	#define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend) \
618	do { \
619	int linecount = srcrect->h; \
620	int left = srcrect->x; \
621	int right = left + srcrect->w; \
622	dstbuf -= left * sizeof(Ptype); \
623	do { \
624	int ofs = 0; \
625	/* blit opaque pixels on one line */ \
626	do { \
627	unsigned run; \
628	ofs += ((Ctype *)srcbuf)[0]; \
629	run = ((Ctype *)srcbuf)[1]; \
630	srcbuf += 2 * sizeof(Ctype); \
631	if (run) { \
632	/* clip to left and right borders */ \
633	int cofs = ofs; \
634	int crun = run; \
635	if (left - cofs > 0) { \
636	crun -= left - cofs; \
637	cofs = left; \
638	} \
639	if (crun > right - cofs) \
640	crun = right - cofs; \
641	if (crun > 0) \
642	PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \
643	srcbuf + (cofs - ofs) * sizeof(Ptype), \
644	(unsigned)crun, sizeof(Ptype)); \
645	srcbuf += run * sizeof(Ptype); \
646	ofs += run; \
647	} else if (!ofs) \
648	return; \
649	} while (ofs < w); \
650	/* skip padding if necessary */ \
651	if (sizeof(Ptype) == 2) \
652	srcbuf += (uintptr_t)srcbuf & 2; \
653	/* blit translucent pixels on the same line */ \
654	ofs = 0; \
655	do { \
656	unsigned run; \
657	ofs += ((Uint16 *)srcbuf)[0]; \
658	run = ((Uint16 *)srcbuf)[1]; \
659	srcbuf += 4; \
660	if (run) { \
661	/* clip to left and right borders */ \
662	int cofs = ofs; \
663	int crun = run; \
664	if (left - cofs > 0) { \
665	crun -= left - cofs; \
666	cofs = left; \
667	} \
668	if (crun > right - cofs) \
669	crun = right - cofs; \
670	if (crun > 0) { \
671	Ptype dst = (Ptype )dstbuf + cofs; \
672	Uint32 src = (Uint32 )srcbuf + (cofs - ofs); \
673	int i; \
674	for (i = 0; i < crun; i++) \
675	do_blend(src[i], dst[i]); \
676	} \
677	srcbuf += run * 4; \
678	ofs += run; \
679	} \
680	} while (ofs < w); \
681	dstbuf += surf_dst->pitch; \
682	} while (--linecount); \
683	} while (0)
684
685	switch (df->bytes_per_pixel) {
686	case `2`:
687	if (df->Gmask == `0x07e0` \|\| df->Rmask == `0x07e0` \|\| df->Bmask == `0x07e0`) {
688	RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
689	} else {
690	RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
691	}
692	break;
693	case `4`:
694	RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
695	break;
696	}
697	}
698
699	// blit a pixel-alpha RLE surface
700	static bool SDLCALL SDL_RLEAlphaBlit(SDL_Surface surf_src, const* SDL_Rect *srcrect,
701	SDL_Surface surf_dst, const* SDL_Rect *dstrect)
702	{
703	int x, y;
704	int w = surf_src->w;
705	Uint8 srcbuf, dstbuf;
706	const SDL_PixelFormatDetails *df = surf_dst->fmt;
707
708	// Lock the destination if necessary
709	if (SDL_MUSTLOCK(surf_dst)) {
710	if (!SDL_LockSurface(surf_dst)) {
711	return false;
712	}
713	}
714
715	x = dstrect->x;
716	y = dstrect->y;
717	dstbuf = (Uint8 )surf_dst->pixels + y surf_dst->pitch + x * df->bytes_per_pixel;
718	srcbuf = (Uint8 )surf_src->map.data + sizeof*(SDL_PixelFormat);
719
720	{
721	// skip lines at the top if necessary
722	int vskip = srcrect->y;
723	if (vskip) {
724	int ofs;
725	if (df->bytes_per_pixel == `2`) {
726	// the 16/32 interleaved format
727	do {
728	// skip opaque line
729	ofs = `0`;
730	do {
731	int run;
732	ofs += srcbuf[`0`];
733	run = srcbuf[`1`];
734	srcbuf += `2`;
735	if (run) {
736	srcbuf += `2` * run;
737	ofs += run;
738	} else if (ofs == `0`) {
739	goto done;
740	}
741	} while (ofs < w);
742
743	// skip padding
744	srcbuf += (uintptr_t)srcbuf & `2`;
745
746	// skip translucent line
747	ofs = `0`;
748	do {
749	int run;
750	ofs += ((Uint16 *)srcbuf)[`0`];
751	run = ((Uint16 *)srcbuf)[`1`];
752	srcbuf += `4` * (run + `1`);
753	ofs += run;
754	} while (ofs < w);
755	} while (--vskip);
756	} else {
757	// the 32/32 interleaved format
758	vskip <<= `1`; // opaque and translucent have same format
759	do {
760	ofs = `0`;
761	do {
762	int run;
763	ofs += ((Uint16 *)srcbuf)[`0`];
764	run = ((Uint16 *)srcbuf)[`1`];
765	srcbuf += `4`;
766	if (run) {
767	srcbuf += `4` * run;
768	ofs += run;
769	} else if (ofs == `0`) {
770	goto done;
771	}
772	} while (ofs < w);
773	} while (--vskip);
774	}
775	}
776	}
777
778	// if left or right edge clipping needed, call clip blit
779	if (srcrect->x \|\| srcrect->w != surf_src->w) {
780	RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
781	} else {
782
783	/*
784	* non-clipped blitter. Ptype is the destination pixel type,
785	* Ctype the translucent count type, and do_blend the
786	* macro to blend one pixel.
787	*/
788	#define RLEALPHABLIT(Ptype, Ctype, do_blend) \
789	do { \
790	int linecount = srcrect->h; \
791	do { \
792	int ofs = 0; \
793	/* blit opaque pixels on one line */ \
794	do { \
795	unsigned run; \
796	ofs += ((Ctype *)srcbuf)[0]; \
797	run = ((Ctype *)srcbuf)[1]; \
798	srcbuf += 2 * sizeof(Ctype); \
799	if (run) { \
800	PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
801	run, sizeof(Ptype)); \
802	srcbuf += run * sizeof(Ptype); \
803	ofs += run; \
804	} else if (!ofs) \
805	goto done; \
806	} while (ofs < w); \
807	/* skip padding if necessary */ \
808	if (sizeof(Ptype) == 2) \
809	srcbuf += (uintptr_t)srcbuf & 2; \
810	/* blit translucent pixels on the same line */ \
811	ofs = 0; \
812	do { \
813	unsigned run; \
814	ofs += ((Uint16 *)srcbuf)[0]; \
815	run = ((Uint16 *)srcbuf)[1]; \
816	srcbuf += 4; \
817	if (run) { \
818	Ptype dst = (Ptype )dstbuf + ofs; \
819	unsigned i; \
820	for (i = 0; i < run; i++) { \
821	Uint32 src = (Uint32 )srcbuf; \
822	do_blend(src, *dst); \
823	srcbuf += 4; \
824	dst++; \
825	} \
826	ofs += run; \
827	} \
828	} while (ofs < w); \
829	dstbuf += surf_dst->pitch; \
830	} while (--linecount); \
831	} while (0)
832
833	switch (df->bytes_per_pixel) {
834	case `2`:
835	if (df->Gmask == `0x07e0` \|\| df->Rmask == `0x07e0` \|\| df->Bmask == `0x07e0`) {
836	RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
837	} else {
838	RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
839	}
840	break;
841	case `4`:
842	RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
843	break;
844	}
845	}
846
847	done:
848	// Unlock the destination if necessary
849	if (SDL_MUSTLOCK(surf_dst)) {
850	SDL_UnlockSurface(surf_dst);
851	}
852	return true;
853	}
854
855	/*
856	* Auxiliary functions:
857	* The encoding functions take 32bpp rgb + a, and
858	* return the number of bytes copied to the destination.
859	* The decoding functions copy to 32bpp rgb + a, and
860	* return the number of bytes copied from the source.
861	* These are only used in the encoder and un-RLE code and are therefore not
862	* highly optimised.
863	*/
864
865	// encode 32bpp rgb + a into 16bpp rgb, losing alpha
866	static int copy_opaque_16(void dst, const* Uint32 src, int* n,
867	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
868	{
869	int i;
870	Uint16 d = (Uint16 )dst;
871	for (i = `0`; i < n; i++) {
872	unsigned r, g, b;
873	RGB_FROM_PIXEL(*src, sfmt, r, g, b);
874	PIXEL_FROM_RGB(*d, dfmt, r, g, b);
875	src++;
876	d++;
877	}
878	return n * `2`;
879	}
880
881	// decode opaque pixels from 16bpp to 32bpp rgb + a
882	static int uncopy_opaque_16(Uint32 dst, const* void src, int* n,
883	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
884	{
885	int i;
886	const Uint16 s = (const* Uint16 *)src;
887	unsigned alpha = dfmt->Amask ? `255` : `0`;
888	for (i = `0`; i < n; i++) {
889	unsigned r, g, b;
890	RGB_FROM_PIXEL(*s, sfmt, r, g, b);
891	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
892	s++;
893	dst++;
894	}
895	return n * `2`;
896	}
897
898	// encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565
899	static int copy_transl_565(void dst, const* Uint32 src, int* n,
900	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
901	{
902	int i;
903	Uint32 d = (Uint32 )dst;
904	for (i = `0`; i < n; i++) {
905	unsigned r, g, b, a;
906	Uint16 pix;
907	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
908	PIXEL_FROM_RGB(pix, dfmt, r, g, b);
909	*d = ((pix & `0x7e0`) << `16`) \| (pix & `0xf81f`) \| ((a << `2`) & `0x7e0`);
910	src++;
911	d++;
912	}
913	return n * `4`;
914	}
915
916	// encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555
917	static int copy_transl_555(void dst, const* Uint32 src, int* n,
918	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
919	{
920	int i;
921	Uint32 d = (Uint32 )dst;
922	for (i = `0`; i < n; i++) {
923	unsigned r, g, b, a;
924	Uint16 pix;
925	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
926	PIXEL_FROM_RGB(pix, dfmt, r, g, b);
927	*d = ((pix & `0x3e0`) << `16`) \| (pix & `0xfc1f`) \| ((a << `2`) & `0x3e0`);
928	src++;
929	d++;
930	}
931	return n * `4`;
932	}
933
934	// decode translucent pixels from 32bpp GORAB to 32bpp rgb + a
935	static int uncopy_transl_16(Uint32 dst, const* void src, int* n,
936	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
937	{
938	int i;
939	const Uint32 s = (const* Uint32 *)src;
940	for (i = `0`; i < n; i++) {
941	unsigned r, g, b, a;
942	Uint32 pix = *s++;
943	a = (pix & `0x3e0`) >> `2`;
944	pix = (pix & ~`0x3e0`) \| pix >> `16`;
945	RGB_FROM_PIXEL(pix, sfmt, r, g, b);
946	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
947	dst++;
948	}
949	return n * `4`;
950	}
951
952	// encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose)
953	static int copy_32(void dst, const* Uint32 src, int* n,
954	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
955	{
956	int i;
957	Uint32 d = (Uint32 )dst;
958	for (i = `0`; i < n; i++) {
959	unsigned r, g, b, a;
960	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
961	RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
962	d++;
963	src++;
964	}
965	return n * `4`;
966	}
967
968	// decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose)
969	static int uncopy_32(Uint32 dst, const* void src, int* n,
970	const SDL_PixelFormatDetails sfmt, const* SDL_PixelFormatDetails *dfmt)
971	{
972	int i;
973	const Uint32 s = (const* Uint32 *)src;
974	for (i = `0`; i < n; i++) {
975	unsigned r, g, b, a;
976	Uint32 pixel = *s++;
977	RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
978	a = pixel >> `24`;
979	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
980	dst++;
981	}
982	return n * `4`;
983	}
984
985	#define ISOPAQUE(pixel, fmt) ((((pixel)&fmt->Amask) >> fmt->Ashift) == 255)
986
987	#define ISTRANSL(pixel, fmt) \
988	((unsigned)((((pixel)&fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
989
990	// convert surface to be quickly alpha-blittable onto dest, if possible
991	static bool RLEAlphaSurface(SDL_Surface *surface)
992	{
993	SDL_Surface *dest;
994	const SDL_PixelFormatDetails *df;
995	int maxsize = `0`;
996	int max_opaque_run;
997	int max_transl_run = `65535`;
998	unsigned masksum;
999	Uint8 rlebuf, dst;
1000	int (copy_opaque)(void* , const* Uint32 , int*,
1001	const SDL_PixelFormatDetails , const* SDL_PixelFormatDetails *);
1002	int (copy_transl)(void* , const* Uint32 , int*,
1003	const SDL_PixelFormatDetails , const* SDL_PixelFormatDetails *);
1004
1005	dest = surface->map.info.dst_surface;
1006	if (!dest) {
1007	return false;
1008	}
1009	df = dest->fmt;
1010	if (surface->fmt->bits_per_pixel != `32`) {
1011	return false; // only 32bpp source supported
1012	}
1013
1014	/ find out whether the destination is one we support,*
1015	and determine the max size of the encoded result /*
1016	masksum = df->Rmask \| df->Gmask \| df->Bmask;
1017	switch (df->bytes_per_pixel) {
1018	case `2`:
1019	// 16bpp: only support 565 and 555 formats
1020	switch (masksum) {
1021	case `0xffff`:
1022	if (df->Gmask == `0x07e0` \|\| df->Rmask == `0x07e0` \|\| df->Bmask == `0x07e0`) {
1023	copy_opaque = copy_opaque_16;
1024	copy_transl = copy_transl_565;
1025	} else {
1026	return false;
1027	}
1028	break;
1029	case `0x7fff`:
1030	if (df->Gmask == `0x03e0` \|\| df->Rmask == `0x03e0` \|\| df->Bmask == `0x03e0`) {
1031	copy_opaque = copy_opaque_16;
1032	copy_transl = copy_transl_555;
1033	} else {
1034	return false;
1035	}
1036	break;
1037	default:
1038	return false;
1039	}
1040	max_opaque_run = `255`; // runs stored as bytes
1041
1042	/ worst case is alternating opaque and translucent pixels,*
1043	with room for alignment padding between lines /*
1044	maxsize = surface->h * (`2` + (`4` + `2`) * (surface->w + `1`)) + `2`;
1045	break;
1046	case `4`:
1047	if (masksum != `0x00ffffff`) {
1048	return false; // requires unused high byte
1049	}
1050	copy_opaque = copy_32;
1051	copy_transl = copy_32;
1052	max_opaque_run = `255`; // runs stored as short ints
1053
1054	// worst case is alternating opaque and translucent pixels
1055	maxsize = surface->h * `2` * `4` * (surface->w + `1`) + `4`;
1056	break;
1057	default:
1058	return false; // anything else unsupported right now
1059	}
1060
1061	maxsize += sizeof(SDL_PixelFormat);
1062	rlebuf = (Uint8 *)SDL_malloc(maxsize);
1063	if (!rlebuf) {
1064	return false;
1065	}
1066	// save the destination format so we can undo the encoding later
1067	(SDL_PixelFormat )rlebuf = dest->format;
1068	dst = rlebuf + sizeof(SDL_PixelFormat);
1069
1070	// Do the actual encoding
1071	{
1072	int x, y;
1073	int h = surface->h, w = surface->w;
1074	const SDL_PixelFormatDetails *sf = surface->fmt;
1075	Uint32 src = (Uint32 )surface->pixels;
1076	Uint8 lastline = dst; // end of last non-blank line*
1077
1078	// opaque counts are 8 or 16 bits, depending on target depth
1079	#define ADD_OPAQUE_COUNTS(n, m) \
1080	if (df->bytes_per_pixel == 4) { \
1081	((Uint16 *)dst)[0] = (Uint16)n; \
1082	((Uint16 *)dst)[1] = (Uint16)m; \
1083	dst += 4; \
1084	} else { \
1085	dst[0] = (Uint8)n; \
1086	dst[1] = (Uint8)m; \
1087	dst += 2; \
1088	}
1089
1090	// translucent counts are always 16 bit
1091	#define ADD_TRANSL_COUNTS(n, m) \
1092	(((Uint16 )dst)[0] = (Uint16)n, ((Uint16 )dst)[1] = (Uint16)m, dst += 4)
1093
1094	for (y = `0`; y < h; y++) {
1095	int runstart, skipstart;
1096	int blankline = `0`;
1097	// First encode all opaque pixels of a scan line
1098	x = `0`;
1099	do {
1100	int run, skip, len;
1101	skipstart = x;
1102	while (x < w && !ISOPAQUE(src[x], sf)) {
1103	x++;
1104	}
1105	runstart = x;
1106	while (x < w && ISOPAQUE(src[x], sf)) {
1107	x++;
1108	}
1109	skip = runstart - skipstart;
1110	if (skip == w) {
1111	blankline = `1`;
1112	}
1113	run = x - runstart;
1114	while (skip > max_opaque_run) {
1115	ADD_OPAQUE_COUNTS(max_opaque_run, `0`);
1116	skip -= max_opaque_run;
1117	}
1118	len = SDL_min(run, max_opaque_run);
1119	ADD_OPAQUE_COUNTS(skip, len);
1120	dst += copy_opaque(dst, src + runstart, len, sf, df);
1121	runstart += len;
1122	run -= len;
1123	while (run) {
1124	len = SDL_min(run, max_opaque_run);
1125	ADD_OPAQUE_COUNTS(`0`, len);
1126	dst += copy_opaque(dst, src + runstart, len, sf, df);
1127	runstart += len;
1128	run -= len;
1129	}
1130	} while (x < w);
1131
1132	// Make sure the next output address is 32-bit aligned
1133	dst += (uintptr_t)dst & `2`;
1134
1135	// Next, encode all translucent pixels of the same scan line
1136	x = `0`;
1137	do {
1138	int run, skip, len;
1139	skipstart = x;
1140	while (x < w && !ISTRANSL(src[x], sf)) {
1141	x++;
1142	}
1143	runstart = x;
1144	while (x < w && ISTRANSL(src[x], sf)) {
1145	x++;
1146	}
1147	skip = runstart - skipstart;
1148	blankline &= (skip == w);
1149	run = x - runstart;
1150	while (skip > max_transl_run) {
1151	ADD_TRANSL_COUNTS(max_transl_run, `0`);
1152	skip -= max_transl_run;
1153	}
1154	len = SDL_min(run, max_transl_run);
1155	ADD_TRANSL_COUNTS(skip, len);
1156	dst += copy_transl(dst, src + runstart, len, sf, df);
1157	runstart += len;
1158	run -= len;
1159	while (run) {
1160	len = SDL_min(run, max_transl_run);
1161	ADD_TRANSL_COUNTS(`0`, len);
1162	dst += copy_transl(dst, src + runstart, len, sf, df);
1163	runstart += len;
1164	run -= len;
1165	}
1166	if (!blankline) {
1167	lastline = dst;
1168	}
1169	} while (x < w);
1170
1171	src += surface->pitch >> `2`;
1172	}
1173	dst = lastline; // back up past trailing blank lines
1174	ADD_OPAQUE_COUNTS(`0`, `0`);
1175	}
1176
1177	#undef ADD_OPAQUE_COUNTS
1178	#undef ADD_TRANSL_COUNTS
1179
1180	// Now that we have it encoded, release the original pixels
1181	if (!(surface->flags & SDL_SURFACE_PREALLOCATED)) {
1182	if (surface->flags & SDL_SURFACE_SIMD_ALIGNED) {
1183	SDL_aligned_free(surface->pixels);
1184	surface->flags &= ~SDL_SURFACE_SIMD_ALIGNED;
1185	} else {
1186	SDL_free(surface->pixels);
1187	}
1188	surface->pixels = NULL;
1189	}
1190
1191	// reallocate the buffer to release unused memory
1192	{
1193	Uint8 p = (Uint8 )SDL_realloc(rlebuf, dst - rlebuf);
1194	if (!p) {
1195	p = rlebuf;
1196	}
1197	surface->map.data = p;
1198	}
1199
1200	return true;
1201	}
1202
1203	static Uint32 getpix_8(const Uint8 *srcbuf)
1204	{
1205	return *srcbuf;
1206	}
1207
1208	static Uint32 getpix_16(const Uint8 *srcbuf)
1209	{
1210	return (const* Uint16 *)srcbuf;
1211	}
1212
1213	static Uint32 getpix_24(const Uint8 *srcbuf)
1214	{
1215	#if SDL_BYTEORDER == SDL_LIL_ENDIAN
1216	return srcbuf[`0`] + (srcbuf[`1`] << `8`) + (srcbuf[`2`] << `16`);
1217	#else
1218	return (srcbuf[`0`] << `16`) + (srcbuf[`1`] << `8`) + srcbuf[`2`];
1219	#endif
1220	}
1221
1222	static Uint32 getpix_32(const Uint8 *srcbuf)
1223	{
1224	return (const* Uint32 *)srcbuf;
1225	}
1226
1227	typedef Uint32 (getpix_func)(const* Uint8 *);
1228
1229	static const getpix_func getpixes[`4`] = {
1230	getpix_8, getpix_16, getpix_24, getpix_32
1231	};
1232
1233	static bool RLEColorkeySurface(SDL_Surface *surface)
1234	{
1235	SDL_Surface *dest;
1236	Uint8 rlebuf, dst;
1237	int maxn;
1238	int y;
1239	Uint8 srcbuf, lastline;
1240	int maxsize = `0`;
1241	const int bpp = surface->fmt->bytes_per_pixel;
1242	getpix_func getpix;
1243	Uint32 ckey, rgbmask;
1244	int w, h;
1245
1246	dest = surface->map.info.dst_surface;
1247	if (!dest) {
1248	return false;
1249	}
1250
1251	// calculate the worst case size for the compressed surface
1252	switch (bpp) {
1253	case `1`:
1254	/ worst case is alternating opaque and transparent pixels,*
1255	starting with an opaque pixel /*
1256	maxsize = surface->h * `3` * (surface->w / `2` + `1`) + `2`;
1257	break;
1258	case `2`:
1259	case `3`:
1260	// worst case is solid runs, at most 255 pixels wide
1261	maxsize = surface->h * (`2` * (surface->w / `255` + `1`) + surface->w * bpp) + `2`;
1262	break;
1263	case `4`:
1264	// worst case is solid runs, at most 65535 pixels wide
1265	maxsize = surface->h * (`4` * (surface->w / `65535` + `1`) + surface->w * `4`) + `4`;
1266	break;
1267
1268	default:
1269	return false;
1270	}
1271
1272	maxsize += sizeof(SDL_PixelFormat);
1273	rlebuf = (Uint8 *)SDL_malloc(maxsize);
1274	if (!rlebuf) {
1275	return false;
1276	}
1277	// save the destination format so we can undo the encoding later
1278	(SDL_PixelFormat )rlebuf = dest->format;
1279
1280	// Set up the conversion
1281	srcbuf = (Uint8 *)surface->pixels;
1282	maxn = bpp == `4` ? `65535` : `255`;
1283	dst = rlebuf + sizeof(SDL_PixelFormat);
1284	rgbmask = ~surface->fmt->Amask;
1285	ckey = surface->map.info.colorkey & rgbmask;
1286	lastline = dst;
1287	getpix = getpixes[bpp - `1`];
1288	w = surface->w;
1289	h = surface->h;
1290
1291	#define ADD_COUNTS(n, m) \
1292	if (bpp == 4) { \
1293	((Uint16 *)dst)[0] = (Uint16)n; \
1294	((Uint16 *)dst)[1] = (Uint16)m; \
1295	dst += 4; \
1296	} else { \
1297	dst[0] = (Uint8)n; \
1298	dst[1] = (Uint8)m; \
1299	dst += 2; \
1300	}
1301
1302	for (y = `0`; y < h; y++) {
1303	int x = `0`;
1304	int blankline = `0`;
1305	do {
1306	int run, skip;
1307	int len;
1308	int runstart;
1309	int skipstart = x;
1310
1311	// find run of transparent, then opaque pixels
1312	while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey) {
1313	x++;
1314	}
1315	runstart = x;
1316	while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey) {
1317	x++;
1318	}
1319	skip = runstart - skipstart;
1320	if (skip == w) {
1321	blankline = `1`;
1322	}
1323	run = x - runstart;
1324
1325	// encode segment
1326	while (skip > maxn) {
1327	ADD_COUNTS(maxn, `0`);
1328	skip -= maxn;
1329	}
1330	len = SDL_min(run, maxn);
1331	ADD_COUNTS(skip, len);
1332	SDL_memcpy(dst, srcbuf + runstart * bpp, (size_t)len * bpp);
1333	dst += len * bpp;
1334	run -= len;
1335	runstart += len;
1336	while (run) {
1337	len = SDL_min(run, maxn);
1338	ADD_COUNTS(`0`, len);
1339	SDL_memcpy(dst, srcbuf + runstart * bpp, (size_t)len * bpp);
1340	dst += len * bpp;
1341	runstart += len;
1342	run -= len;
1343	}
1344	if (!blankline) {
1345	lastline = dst;
1346	}
1347	} while (x < w);
1348
1349	srcbuf += surface->pitch;
1350	}
1351	dst = lastline; // back up bast trailing blank lines
1352	ADD_COUNTS(`0`, `0`);
1353
1354	#undef ADD_COUNTS
1355
1356	// Now that we have it encoded, release the original pixels
1357	if (!(surface->flags & SDL_SURFACE_PREALLOCATED)) {
1358	if (surface->flags & SDL_SURFACE_SIMD_ALIGNED) {
1359	SDL_aligned_free(surface->pixels);
1360	surface->flags &= ~SDL_SURFACE_SIMD_ALIGNED;
1361	} else {
1362	SDL_free(surface->pixels);
1363	}
1364	surface->pixels = NULL;
1365	}
1366
1367	// reallocate the buffer to release unused memory
1368	{
1369	// If SDL_realloc returns NULL, the original block is left intact
1370	Uint8 p = (Uint8 )SDL_realloc(rlebuf, dst - rlebuf);
1371	if (!p) {
1372	p = rlebuf;
1373	}
1374	surface->map.data = p;
1375	}
1376
1377	return true;
1378	}
1379
1380	bool SDL_RLESurface(SDL_Surface *surface)
1381	{
1382	int flags;
1383
1384	// Clear any previous RLE conversion
1385	if (surface->internal_flags & SDL_INTERNAL_SURFACE_RLEACCEL) {
1386	SDL_UnRLESurface(surface, true);
1387	}
1388
1389	// We don't support RLE encoding of bitmaps
1390	if (SDL_BITSPERPIXEL(surface->format) < `8`) {
1391	return false;
1392	}
1393
1394	// Make sure the pixels are available
1395	if (!surface->pixels) {
1396	return false;
1397	}
1398
1399	flags = surface->map.info.flags;
1400	if (flags & SDL_COPY_COLORKEY) {
1401	// ok
1402	} else if ((flags & SDL_COPY_BLEND) && SDL_ISPIXELFORMAT_ALPHA(surface->format)) {
1403	// ok
1404	} else {
1405	// If we don't have colorkey or blending, nothing to do...
1406	return false;
1407	}
1408
1409	// Pass on combinations not supported
1410	if ((flags & SDL_COPY_MODULATE_COLOR) \|\|
1411	((flags & SDL_COPY_MODULATE_ALPHA) && SDL_ISPIXELFORMAT_ALPHA(surface->format)) \|\|
1412	(flags & (SDL_COPY_BLEND_PREMULTIPLIED \| SDL_COPY_ADD \| SDL_COPY_ADD_PREMULTIPLIED \| SDL_COPY_MOD \| SDL_COPY_MUL)) \|\|
1413	(flags & SDL_COPY_NEAREST)) {
1414	return false;
1415	}
1416
1417	// Encode and set up the blit
1418	if (!SDL_ISPIXELFORMAT_ALPHA(surface->format) \|\| !(flags & SDL_COPY_BLEND)) {
1419	if (!surface->map.identity) {
1420	return false;
1421	}
1422	if (!RLEColorkeySurface(surface)) {
1423	return false;
1424	}
1425	surface->map.blit = SDL_RLEBlit;
1426	surface->map.info.flags \|= SDL_COPY_RLE_COLORKEY;
1427	} else {
1428	if (!RLEAlphaSurface(surface)) {
1429	return false;
1430	}
1431	surface->map.blit = SDL_RLEAlphaBlit;
1432	surface->map.info.flags \|= SDL_COPY_RLE_ALPHAKEY;
1433	}
1434
1435	// The surface is now accelerated
1436	surface->internal_flags \|= SDL_INTERNAL_SURFACE_RLEACCEL;
1437
1438	return true;
1439	}
1440
1441	/*
1442	* Un-RLE a surface with pixel alpha
1443	* This may not give back exactly the image before RLE-encoding; all
1444	* completely transparent pixels will be lost, and color and alpha depth
1445	* may have been reduced (when encoding for 16bpp targets).
1446	*/
1447	static bool UnRLEAlpha(SDL_Surface *surface)
1448	{
1449	Uint8 *srcbuf;
1450	Uint32 *dst;
1451	const SDL_PixelFormatDetails *sf = surface->fmt;
1452	const SDL_PixelFormatDetails df = SDL_GetPixelFormatDetails((SDL_PixelFormat *)surface->map.data);
1453	int (uncopy_opaque)(Uint32 , const void , int*,
1454	const SDL_PixelFormatDetails , const* SDL_PixelFormatDetails *);
1455	int (uncopy_transl)(Uint32 , const void , int*,
1456	const SDL_PixelFormatDetails , const* SDL_PixelFormatDetails *);
1457	int w = surface->w;
1458	int bpp = df->bytes_per_pixel;
1459	size_t size;
1460
1461	if (bpp == `2`) {
1462	uncopy_opaque = uncopy_opaque_16;
1463	uncopy_transl = uncopy_transl_16;
1464	} else {
1465	uncopy_opaque = uncopy_transl = uncopy_32;
1466	}
1467
1468	if (!SDL_size_mul_check_overflow(surface->h, surface->pitch, &size)) {
1469	return false;
1470	}
1471
1472	surface->pixels = SDL_aligned_alloc(SDL_GetSIMDAlignment(), size);
1473	if (!surface->pixels) {
1474	return false;
1475	}
1476	surface->flags \|= SDL_SURFACE_SIMD_ALIGNED;
1477	// fill background with transparent pixels
1478	SDL_memset(surface->pixels, `0`, (size_t)surface->h * surface->pitch);
1479
1480	dst = (Uint32 *)surface->pixels;
1481	srcbuf = (Uint8 )surface->map.data + sizeof*(SDL_PixelFormat);
1482	for (;;) {
1483	// copy opaque pixels
1484	int ofs = `0`;
1485	do {
1486	unsigned run;
1487	if (bpp == `2`) {
1488	ofs += srcbuf[`0`];
1489	run = srcbuf[`1`];
1490	srcbuf += `2`;
1491	} else {
1492	ofs += ((Uint16 *)srcbuf)[`0`];
1493	run = ((Uint16 *)srcbuf)[`1`];
1494	srcbuf += `4`;
1495	}
1496	if (run) {
1497	srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
1498	ofs += run;
1499	} else if (!ofs) {
1500	goto end_function;
1501	}
1502	} while (ofs < w);
1503
1504	// skip padding if needed
1505	if (bpp == `2`) {
1506	srcbuf += (uintptr_t)srcbuf & `2`;
1507	}
1508
1509	// copy translucent pixels
1510	ofs = `0`;
1511	do {
1512	unsigned run;
1513	ofs += ((Uint16 *)srcbuf)[`0`];
1514	run = ((Uint16 *)srcbuf)[`1`];
1515	srcbuf += `4`;
1516	if (run) {
1517	srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
1518	ofs += run;
1519	}
1520	} while (ofs < w);
1521	dst += surface->pitch >> `2`;
1522	}
1523
1524	end_function:
1525	return true;
1526	}
1527
1528	void SDL_UnRLESurface(SDL_Surface *surface, bool recode)
1529	{
1530	if (surface->internal_flags & SDL_INTERNAL_SURFACE_RLEACCEL) {
1531	surface->internal_flags &= ~SDL_INTERNAL_SURFACE_RLEACCEL;
1532
1533	if (recode && !(surface->flags & SDL_SURFACE_PREALLOCATED)) {
1534	if (surface->map.info.flags & SDL_COPY_RLE_COLORKEY) {
1535	SDL_Rect full;
1536	size_t size;
1537
1538	// re-create the original surface
1539	if (!SDL_size_mul_check_overflow(surface->h, surface->pitch, &size)) {
1540	// Memory corruption?
1541	surface->internal_flags \|= SDL_INTERNAL_SURFACE_RLEACCEL;
1542	return;
1543	}
1544	surface->pixels = SDL_aligned_alloc(SDL_GetSIMDAlignment(), size);
1545	if (!surface->pixels) {
1546	// Oh crap...
1547	surface->internal_flags \|= SDL_INTERNAL_SURFACE_RLEACCEL;
1548	return;
1549	}
1550	surface->flags \|= SDL_SURFACE_SIMD_ALIGNED;
1551
1552	// fill it with the background color
1553	SDL_FillSurfaceRect(surface, NULL, surface->map.info.colorkey);
1554
1555	// now render the encoded surface
1556	full.x = full.y = `0`;
1557	full.w = surface->w;
1558	full.h = surface->h;
1559	SDL_RLEBlit(surface, &full, surface, &full);
1560	} else {
1561	if (!UnRLEAlpha(surface)) {
1562	// Oh crap...
1563	surface->internal_flags \|= SDL_INTERNAL_SURFACE_RLEACCEL;
1564	return;
1565	}
1566	}
1567	}
1568	surface->map.info.flags &=
1569	~(SDL_COPY_RLE_COLORKEY \| SDL_COPY_RLE_ALPHAKEY);
1570
1571	SDL_free(surface->map.data);
1572	surface->map.data = NULL;
1573	}
1574	}
1575
1576	#endif // SDL_HAVE_RLE
1577

Browse the source code of SDL/src/video/SDL_RLEaccel.c