1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#include "SDL_surface_c.h"
24
25#ifdef SDL_SSE_INTRINSICS
26/* *INDENT-OFF* */ // clang-format off
27
28#if defined(_MSC_VER) && !defined(__clang__)
29#define SSE_BEGIN \
30 __m128 c128; \
31 c128.m128_u32[0] = color; \
32 c128.m128_u32[1] = color; \
33 c128.m128_u32[2] = color; \
34 c128.m128_u32[3] = color;
35#else
36#define SSE_BEGIN \
37 __m128 c128; \
38 DECLARE_ALIGNED(Uint32, cccc[4], 16); \
39 cccc[0] = color; \
40 cccc[1] = color; \
41 cccc[2] = color; \
42 cccc[3] = color; \
43 c128 = *(__m128 *)cccc;
44#endif
45
46#define SSE_WORK \
47 for (i = n / 64; i--;) { \
48 _mm_stream_ps((float *)(p+0), c128); \
49 _mm_stream_ps((float *)(p+16), c128); \
50 _mm_stream_ps((float *)(p+32), c128); \
51 _mm_stream_ps((float *)(p+48), c128); \
52 p += 64; \
53 }
54
55#define SSE_END
56
57#define DEFINE_SSE_FILLRECT(bpp, type) \
58static void SDL_TARGETING("sse") SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
59{ \
60 int i, n; \
61 Uint8 *p = NULL; \
62 \
63 /* If the number of bytes per row is equal to the pitch, treat */ \
64 /* all rows as one long continuous row (for better performance) */ \
65 if ((w) * (bpp) == pitch) { \
66 w = w * h; \
67 h = 1; \
68 } \
69 \
70 SSE_BEGIN; \
71 \
72 while (h--) { \
73 n = (w) * (bpp); \
74 p = pixels; \
75 \
76 if (n > 63) { \
77 int adjust = 16 - ((uintptr_t)p & 15); \
78 if (adjust < 16) { \
79 n -= adjust; \
80 adjust /= (bpp); \
81 while (adjust--) { \
82 *((type *)p) = (type)color; \
83 p += (bpp); \
84 } \
85 } \
86 SSE_WORK; \
87 } \
88 if (n & 63) { \
89 int remainder = (n & 63); \
90 remainder /= (bpp); \
91 while (remainder--) { \
92 *((type *)p) = (type)color; \
93 p += (bpp); \
94 } \
95 } \
96 pixels += pitch; \
97 } \
98 \
99 SSE_END; \
100}
101
102static void SDL_TARGETING("sse") SDL_FillSurfaceRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
103{
104 int i, n;
105
106 SSE_BEGIN;
107 while (h--) {
108 Uint8 *p = pixels;
109 n = w;
110
111 if (n > 63) {
112 int adjust = 16 - ((uintptr_t)p & 15);
113 if (adjust) {
114 n -= adjust;
115 SDL_memset(p, color, adjust);
116 p += adjust;
117 }
118 SSE_WORK;
119 }
120 if (n & 63) {
121 int remainder = (n & 63);
122 SDL_memset(p, color, remainder);
123 }
124 pixels += pitch;
125 }
126
127 SSE_END;
128}
129// DEFINE_SSE_FILLRECT(1, Uint8)
130DEFINE_SSE_FILLRECT(2, Uint16)
131DEFINE_SSE_FILLRECT(4, Uint32)
132
133/* *INDENT-ON* */ // clang-format on
134#endif // __SSE__
135
136static void SDL_FillSurfaceRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
137{
138 int n;
139 Uint8 *p = NULL;
140
141 while (h--) {
142 n = w;
143 p = pixels;
144
145 if (n > 3) {
146 switch ((uintptr_t)p & 3) {
147 case 1:
148 *p++ = (Uint8)color;
149 --n;
150 SDL_FALLTHROUGH;
151 case 2:
152 *p++ = (Uint8)color;
153 --n;
154 SDL_FALLTHROUGH;
155 case 3:
156 *p++ = (Uint8)color;
157 --n;
158 }
159 SDL_memset4(p, color, (n >> 2));
160 }
161 if (n & 3) {
162 p += (n & ~3);
163 switch (n & 3) {
164 case 3:
165 *p++ = (Uint8)color;
166 SDL_FALLTHROUGH;
167 case 2:
168 *p++ = (Uint8)color;
169 SDL_FALLTHROUGH;
170 case 1:
171 *p++ = (Uint8)color;
172 }
173 }
174 pixels += pitch;
175 }
176}
177
178static void SDL_FillSurfaceRect2(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
179{
180 int n;
181 Uint16 *p = NULL;
182
183 while (h--) {
184 n = w;
185 p = (Uint16 *)pixels;
186
187 if (n > 1) {
188 if ((uintptr_t)p & 2) {
189 *p++ = (Uint16)color;
190 --n;
191 }
192 SDL_memset4(p, color, (n >> 1));
193 }
194 if (n & 1) {
195 p[n - 1] = (Uint16)color;
196 }
197 pixels += pitch;
198 }
199}
200
201static void SDL_FillSurfaceRect3(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
202{
203#if SDL_BYTEORDER == SDL_LIL_ENDIAN
204 Uint8 b1 = (Uint8)(color & 0xFF);
205 Uint8 b2 = (Uint8)((color >> 8) & 0xFF);
206 Uint8 b3 = (Uint8)((color >> 16) & 0xFF);
207#elif SDL_BYTEORDER == SDL_BIG_ENDIAN
208 Uint8 b1 = (Uint8)((color >> 16) & 0xFF);
209 Uint8 b2 = (Uint8)((color >> 8) & 0xFF);
210 Uint8 b3 = (Uint8)(color & 0xFF);
211#endif
212 int n;
213 Uint8 *p = NULL;
214
215 while (h--) {
216 n = w;
217 p = pixels;
218
219 while (n--) {
220 *p++ = b1;
221 *p++ = b2;
222 *p++ = b3;
223 }
224 pixels += pitch;
225 }
226}
227
228static void SDL_FillSurfaceRect4(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
229{
230 while (h--) {
231 SDL_memset4(pixels, color, w);
232 pixels += pitch;
233 }
234}
235
236/*
237 * This function performs a fast fill of the given rectangle with 'color'
238 */
239bool SDL_FillSurfaceRect(SDL_Surface *dst, const SDL_Rect *rect, Uint32 color)
240{
241 if (!SDL_SurfaceValid(dst)) {
242 return SDL_InvalidParamError("SDL_FillSurfaceRect(): dst");
243 }
244
245 // If 'rect' == NULL, then fill the whole surface
246 if (!rect) {
247 rect = &dst->clip_rect;
248 // Don't attempt to fill if the surface's clip_rect is empty
249 if (SDL_RectEmpty(rect)) {
250 return true;
251 }
252 }
253
254 return SDL_FillSurfaceRects(dst, rect, 1, color);
255}
256
257bool SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, Uint32 color)
258{
259 SDL_Rect clipped;
260 Uint8 *pixels;
261 const SDL_Rect *rect;
262 void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
263 int i;
264
265 if (!SDL_SurfaceValid(dst)) {
266 return SDL_InvalidParamError("SDL_FillSurfaceRects(): dst");
267 }
268
269 // Nothing to do
270 if (dst->w == 0 || dst->h == 0) {
271 return true;
272 }
273
274 // Perform software fill
275 if (!dst->pixels) {
276 return SDL_SetError("SDL_FillSurfaceRects(): You must lock the surface");
277 }
278
279 if (!rects) {
280 return SDL_InvalidParamError("SDL_FillSurfaceRects(): rects");
281 }
282
283 /* This function doesn't usually work on surfaces < 8 bpp
284 * Except: support for 4bits, when filling full size.
285 */
286 if (SDL_BITSPERPIXEL(dst->format) < 8) {
287 if (count == 1) {
288 const SDL_Rect *r = &rects[0];
289 if (r->x == 0 && r->y == 0 && r->w == dst->w && r->h == dst->h) {
290 if (SDL_BITSPERPIXEL(dst->format) == 4) {
291 Uint8 b = (((Uint8)color << 4) | (Uint8)color);
292 SDL_memset(dst->pixels, b, (size_t)dst->h * dst->pitch);
293 return true;
294 }
295 }
296 }
297 return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format");
298 }
299
300 if (fill_function == NULL) {
301 switch (SDL_BYTESPERPIXEL(dst->format)) {
302 case 1:
303 {
304 color |= (color << 8);
305 color |= (color << 16);
306#ifdef SDL_SSE_INTRINSICS
307 if (SDL_HasSSE()) {
308 fill_function = SDL_FillSurfaceRect1SSE;
309 break;
310 }
311#endif
312 fill_function = SDL_FillSurfaceRect1;
313 break;
314 }
315
316 case 2:
317 {
318 color |= (color << 16);
319#ifdef SDL_SSE_INTRINSICS
320 if (SDL_HasSSE()) {
321 fill_function = SDL_FillSurfaceRect2SSE;
322 break;
323 }
324#endif
325 fill_function = SDL_FillSurfaceRect2;
326 break;
327 }
328
329 case 3:
330 // 24-bit RGB is a slow path, at least for now.
331 {
332 fill_function = SDL_FillSurfaceRect3;
333 break;
334 }
335
336 case 4:
337 {
338#ifdef SDL_SSE_INTRINSICS
339 if (SDL_HasSSE()) {
340 fill_function = SDL_FillSurfaceRect4SSE;
341 break;
342 }
343#endif
344 fill_function = SDL_FillSurfaceRect4;
345 break;
346 }
347
348 default:
349 return SDL_SetError("Unsupported pixel format");
350 }
351 }
352
353 for (i = 0; i < count; ++i) {
354 rect = &rects[i];
355 // Perform clipping
356 if (!SDL_GetRectIntersection(rect, &dst->clip_rect, &clipped)) {
357 continue;
358 }
359 rect = &clipped;
360
361 pixels = (Uint8 *)dst->pixels + rect->y * dst->pitch +
362 rect->x * SDL_BYTESPERPIXEL(dst->format);
363
364 fill_function(pixels, dst->pitch, color, rect->w, rect->h);
365 }
366
367 // We're done!
368 return true;
369}
370