1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "SDL_internal.h" |
22 | |
23 | #include "SDL_surface_c.h" |
24 | |
25 | #ifdef SDL_SSE_INTRINSICS |
26 | /* *INDENT-OFF* */ // clang-format off |
27 | |
28 | #if defined(_MSC_VER) && !defined(__clang__) |
29 | #define SSE_BEGIN \ |
30 | __m128 c128; \ |
31 | c128.m128_u32[0] = color; \ |
32 | c128.m128_u32[1] = color; \ |
33 | c128.m128_u32[2] = color; \ |
34 | c128.m128_u32[3] = color; |
35 | #else |
36 | #define SSE_BEGIN \ |
37 | __m128 c128; \ |
38 | DECLARE_ALIGNED(Uint32, cccc[4], 16); \ |
39 | cccc[0] = color; \ |
40 | cccc[1] = color; \ |
41 | cccc[2] = color; \ |
42 | cccc[3] = color; \ |
43 | c128 = *(__m128 *)cccc; |
44 | #endif |
45 | |
46 | #define SSE_WORK \ |
47 | for (i = n / 64; i--;) { \ |
48 | _mm_stream_ps((float *)(p+0), c128); \ |
49 | _mm_stream_ps((float *)(p+16), c128); \ |
50 | _mm_stream_ps((float *)(p+32), c128); \ |
51 | _mm_stream_ps((float *)(p+48), c128); \ |
52 | p += 64; \ |
53 | } |
54 | |
55 | #define SSE_END |
56 | |
57 | #define DEFINE_SSE_FILLRECT(bpp, type) \ |
58 | static void SDL_TARGETING("sse") SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ |
59 | { \ |
60 | int i, n; \ |
61 | Uint8 *p = NULL; \ |
62 | \ |
63 | /* If the number of bytes per row is equal to the pitch, treat */ \ |
64 | /* all rows as one long continuous row (for better performance) */ \ |
65 | if ((w) * (bpp) == pitch) { \ |
66 | w = w * h; \ |
67 | h = 1; \ |
68 | } \ |
69 | \ |
70 | SSE_BEGIN; \ |
71 | \ |
72 | while (h--) { \ |
73 | n = (w) * (bpp); \ |
74 | p = pixels; \ |
75 | \ |
76 | if (n > 63) { \ |
77 | int adjust = 16 - ((uintptr_t)p & 15); \ |
78 | if (adjust < 16) { \ |
79 | n -= adjust; \ |
80 | adjust /= (bpp); \ |
81 | while (adjust--) { \ |
82 | *((type *)p) = (type)color; \ |
83 | p += (bpp); \ |
84 | } \ |
85 | } \ |
86 | SSE_WORK; \ |
87 | } \ |
88 | if (n & 63) { \ |
89 | int remainder = (n & 63); \ |
90 | remainder /= (bpp); \ |
91 | while (remainder--) { \ |
92 | *((type *)p) = (type)color; \ |
93 | p += (bpp); \ |
94 | } \ |
95 | } \ |
96 | pixels += pitch; \ |
97 | } \ |
98 | \ |
99 | SSE_END; \ |
100 | } |
101 | |
102 | static void SDL_TARGETING("sse" ) SDL_FillSurfaceRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) |
103 | { |
104 | int i, n; |
105 | |
106 | SSE_BEGIN; |
107 | while (h--) { |
108 | Uint8 *p = pixels; |
109 | n = w; |
110 | |
111 | if (n > 63) { |
112 | int adjust = 16 - ((uintptr_t)p & 15); |
113 | if (adjust) { |
114 | n -= adjust; |
115 | SDL_memset(p, color, adjust); |
116 | p += adjust; |
117 | } |
118 | SSE_WORK; |
119 | } |
120 | if (n & 63) { |
121 | int remainder = (n & 63); |
122 | SDL_memset(p, color, remainder); |
123 | } |
124 | pixels += pitch; |
125 | } |
126 | |
127 | SSE_END; |
128 | } |
129 | // DEFINE_SSE_FILLRECT(1, Uint8) |
130 | DEFINE_SSE_FILLRECT(2, Uint16) |
131 | DEFINE_SSE_FILLRECT(4, Uint32) |
132 | |
133 | /* *INDENT-ON* */ // clang-format on |
134 | #endif // __SSE__ |
135 | |
136 | static void SDL_FillSurfaceRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h) |
137 | { |
138 | int n; |
139 | Uint8 *p = NULL; |
140 | |
141 | while (h--) { |
142 | n = w; |
143 | p = pixels; |
144 | |
145 | if (n > 3) { |
146 | switch ((uintptr_t)p & 3) { |
147 | case 1: |
148 | *p++ = (Uint8)color; |
149 | --n; |
150 | SDL_FALLTHROUGH; |
151 | case 2: |
152 | *p++ = (Uint8)color; |
153 | --n; |
154 | SDL_FALLTHROUGH; |
155 | case 3: |
156 | *p++ = (Uint8)color; |
157 | --n; |
158 | } |
159 | SDL_memset4(p, color, (n >> 2)); |
160 | } |
161 | if (n & 3) { |
162 | p += (n & ~3); |
163 | switch (n & 3) { |
164 | case 3: |
165 | *p++ = (Uint8)color; |
166 | SDL_FALLTHROUGH; |
167 | case 2: |
168 | *p++ = (Uint8)color; |
169 | SDL_FALLTHROUGH; |
170 | case 1: |
171 | *p++ = (Uint8)color; |
172 | } |
173 | } |
174 | pixels += pitch; |
175 | } |
176 | } |
177 | |
178 | static void SDL_FillSurfaceRect2(Uint8 *pixels, int pitch, Uint32 color, int w, int h) |
179 | { |
180 | int n; |
181 | Uint16 *p = NULL; |
182 | |
183 | while (h--) { |
184 | n = w; |
185 | p = (Uint16 *)pixels; |
186 | |
187 | if (n > 1) { |
188 | if ((uintptr_t)p & 2) { |
189 | *p++ = (Uint16)color; |
190 | --n; |
191 | } |
192 | SDL_memset4(p, color, (n >> 1)); |
193 | } |
194 | if (n & 1) { |
195 | p[n - 1] = (Uint16)color; |
196 | } |
197 | pixels += pitch; |
198 | } |
199 | } |
200 | |
201 | static void SDL_FillSurfaceRect3(Uint8 *pixels, int pitch, Uint32 color, int w, int h) |
202 | { |
203 | #if SDL_BYTEORDER == SDL_LIL_ENDIAN |
204 | Uint8 b1 = (Uint8)(color & 0xFF); |
205 | Uint8 b2 = (Uint8)((color >> 8) & 0xFF); |
206 | Uint8 b3 = (Uint8)((color >> 16) & 0xFF); |
207 | #elif SDL_BYTEORDER == SDL_BIG_ENDIAN |
208 | Uint8 b1 = (Uint8)((color >> 16) & 0xFF); |
209 | Uint8 b2 = (Uint8)((color >> 8) & 0xFF); |
210 | Uint8 b3 = (Uint8)(color & 0xFF); |
211 | #endif |
212 | int n; |
213 | Uint8 *p = NULL; |
214 | |
215 | while (h--) { |
216 | n = w; |
217 | p = pixels; |
218 | |
219 | while (n--) { |
220 | *p++ = b1; |
221 | *p++ = b2; |
222 | *p++ = b3; |
223 | } |
224 | pixels += pitch; |
225 | } |
226 | } |
227 | |
228 | static void SDL_FillSurfaceRect4(Uint8 *pixels, int pitch, Uint32 color, int w, int h) |
229 | { |
230 | while (h--) { |
231 | SDL_memset4(pixels, color, w); |
232 | pixels += pitch; |
233 | } |
234 | } |
235 | |
236 | /* |
237 | * This function performs a fast fill of the given rectangle with 'color' |
238 | */ |
239 | bool SDL_FillSurfaceRect(SDL_Surface *dst, const SDL_Rect *rect, Uint32 color) |
240 | { |
241 | if (!SDL_SurfaceValid(dst)) { |
242 | return SDL_InvalidParamError("SDL_FillSurfaceRect(): dst" ); |
243 | } |
244 | |
245 | // If 'rect' == NULL, then fill the whole surface |
246 | if (!rect) { |
247 | rect = &dst->clip_rect; |
248 | // Don't attempt to fill if the surface's clip_rect is empty |
249 | if (SDL_RectEmpty(rect)) { |
250 | return true; |
251 | } |
252 | } |
253 | |
254 | return SDL_FillSurfaceRects(dst, rect, 1, color); |
255 | } |
256 | |
257 | bool SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, Uint32 color) |
258 | { |
259 | SDL_Rect clipped; |
260 | Uint8 *pixels; |
261 | const SDL_Rect *rect; |
262 | void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL; |
263 | int i; |
264 | |
265 | if (!SDL_SurfaceValid(dst)) { |
266 | return SDL_InvalidParamError("SDL_FillSurfaceRects(): dst" ); |
267 | } |
268 | |
269 | // Nothing to do |
270 | if (dst->w == 0 || dst->h == 0) { |
271 | return true; |
272 | } |
273 | |
274 | // Perform software fill |
275 | if (!dst->pixels) { |
276 | return SDL_SetError("SDL_FillSurfaceRects(): You must lock the surface" ); |
277 | } |
278 | |
279 | if (!rects) { |
280 | return SDL_InvalidParamError("SDL_FillSurfaceRects(): rects" ); |
281 | } |
282 | |
283 | /* This function doesn't usually work on surfaces < 8 bpp |
284 | * Except: support for 4bits, when filling full size. |
285 | */ |
286 | if (SDL_BITSPERPIXEL(dst->format) < 8) { |
287 | if (count == 1) { |
288 | const SDL_Rect *r = &rects[0]; |
289 | if (r->x == 0 && r->y == 0 && r->w == dst->w && r->h == dst->h) { |
290 | if (SDL_BITSPERPIXEL(dst->format) == 4) { |
291 | Uint8 b = (((Uint8)color << 4) | (Uint8)color); |
292 | SDL_memset(dst->pixels, b, (size_t)dst->h * dst->pitch); |
293 | return true; |
294 | } |
295 | } |
296 | } |
297 | return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format" ); |
298 | } |
299 | |
300 | if (fill_function == NULL) { |
301 | switch (SDL_BYTESPERPIXEL(dst->format)) { |
302 | case 1: |
303 | { |
304 | color |= (color << 8); |
305 | color |= (color << 16); |
306 | #ifdef SDL_SSE_INTRINSICS |
307 | if (SDL_HasSSE()) { |
308 | fill_function = SDL_FillSurfaceRect1SSE; |
309 | break; |
310 | } |
311 | #endif |
312 | fill_function = SDL_FillSurfaceRect1; |
313 | break; |
314 | } |
315 | |
316 | case 2: |
317 | { |
318 | color |= (color << 16); |
319 | #ifdef SDL_SSE_INTRINSICS |
320 | if (SDL_HasSSE()) { |
321 | fill_function = SDL_FillSurfaceRect2SSE; |
322 | break; |
323 | } |
324 | #endif |
325 | fill_function = SDL_FillSurfaceRect2; |
326 | break; |
327 | } |
328 | |
329 | case 3: |
330 | // 24-bit RGB is a slow path, at least for now. |
331 | { |
332 | fill_function = SDL_FillSurfaceRect3; |
333 | break; |
334 | } |
335 | |
336 | case 4: |
337 | { |
338 | #ifdef SDL_SSE_INTRINSICS |
339 | if (SDL_HasSSE()) { |
340 | fill_function = SDL_FillSurfaceRect4SSE; |
341 | break; |
342 | } |
343 | #endif |
344 | fill_function = SDL_FillSurfaceRect4; |
345 | break; |
346 | } |
347 | |
348 | default: |
349 | return SDL_SetError("Unsupported pixel format" ); |
350 | } |
351 | } |
352 | |
353 | for (i = 0; i < count; ++i) { |
354 | rect = &rects[i]; |
355 | // Perform clipping |
356 | if (!SDL_GetRectIntersection(rect, &dst->clip_rect, &clipped)) { |
357 | continue; |
358 | } |
359 | rect = &clipped; |
360 | |
361 | pixels = (Uint8 *)dst->pixels + rect->y * dst->pitch + |
362 | rect->x * SDL_BYTESPERPIXEL(dst->format); |
363 | |
364 | fill_function(pixels, dst->pitch, color, rect->w, rect->h); |
365 | } |
366 | |
367 | // We're done! |
368 | return true; |
369 | } |
370 | |