1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "../SDL_internal.h" |
22 | |
23 | #include "SDL_video.h" |
24 | #include "SDL_blit.h" |
25 | #include "SDL_cpuinfo.h" |
26 | |
27 | |
28 | #ifdef __SSE__ |
29 | /* *INDENT-OFF* */ |
30 | |
31 | #if defined(_MSC_VER) && !defined(__clang__) |
32 | #define SSE_BEGIN \ |
33 | __m128 c128; \ |
34 | c128.m128_u32[0] = color; \ |
35 | c128.m128_u32[1] = color; \ |
36 | c128.m128_u32[2] = color; \ |
37 | c128.m128_u32[3] = color; |
38 | #else |
39 | #define SSE_BEGIN \ |
40 | __m128 c128; \ |
41 | DECLARE_ALIGNED(Uint32, cccc[4], 16); \ |
42 | cccc[0] = color; \ |
43 | cccc[1] = color; \ |
44 | cccc[2] = color; \ |
45 | cccc[3] = color; \ |
46 | c128 = *(__m128 *)cccc; |
47 | #endif |
48 | |
49 | #define SSE_WORK \ |
50 | for (i = n / 64; i--;) { \ |
51 | _mm_stream_ps((float *)(p+0), c128); \ |
52 | _mm_stream_ps((float *)(p+16), c128); \ |
53 | _mm_stream_ps((float *)(p+32), c128); \ |
54 | _mm_stream_ps((float *)(p+48), c128); \ |
55 | p += 64; \ |
56 | } |
57 | |
58 | #define SSE_END |
59 | |
60 | #define DEFINE_SSE_FILLRECT(bpp, type) \ |
61 | static void \ |
62 | SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \ |
63 | { \ |
64 | int i, n; \ |
65 | Uint8 *p = NULL; \ |
66 | \ |
67 | SSE_BEGIN; \ |
68 | \ |
69 | while (h--) { \ |
70 | n = w * bpp; \ |
71 | p = pixels; \ |
72 | \ |
73 | if (n > 63) { \ |
74 | int adjust = 16 - ((uintptr_t)p & 15); \ |
75 | if (adjust < 16) { \ |
76 | n -= adjust; \ |
77 | adjust /= bpp; \ |
78 | while (adjust--) { \ |
79 | *((type *)p) = (type)color; \ |
80 | p += bpp; \ |
81 | } \ |
82 | } \ |
83 | SSE_WORK; \ |
84 | } \ |
85 | if (n & 63) { \ |
86 | int remainder = (n & 63); \ |
87 | remainder /= bpp; \ |
88 | while (remainder--) { \ |
89 | *((type *)p) = (type)color; \ |
90 | p += bpp; \ |
91 | } \ |
92 | } \ |
93 | pixels += pitch; \ |
94 | } \ |
95 | \ |
96 | SSE_END; \ |
97 | } |
98 | |
99 | static void |
100 | SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) |
101 | { |
102 | int i, n; |
103 | |
104 | SSE_BEGIN; |
105 | while (h--) { |
106 | Uint8 *p = pixels; |
107 | n = w; |
108 | |
109 | if (n > 63) { |
110 | int adjust = 16 - ((uintptr_t)p & 15); |
111 | if (adjust) { |
112 | n -= adjust; |
113 | SDL_memset(p, color, adjust); |
114 | p += adjust; |
115 | } |
116 | SSE_WORK; |
117 | } |
118 | if (n & 63) { |
119 | int remainder = (n & 63); |
120 | SDL_memset(p, color, remainder); |
121 | } |
122 | pixels += pitch; |
123 | } |
124 | |
125 | SSE_END; |
126 | } |
127 | /* DEFINE_SSE_FILLRECT(1, Uint8) */ |
128 | DEFINE_SSE_FILLRECT(2, Uint16) |
129 | DEFINE_SSE_FILLRECT(4, Uint32) |
130 | |
131 | /* *INDENT-ON* */ |
132 | #endif /* __SSE__ */ |
133 | |
134 | static void |
135 | SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h) |
136 | { |
137 | int n; |
138 | Uint8 *p = NULL; |
139 | |
140 | while (h--) { |
141 | n = w; |
142 | p = pixels; |
143 | |
144 | if (n > 3) { |
145 | switch ((uintptr_t) p & 3) { |
146 | case 1: |
147 | *p++ = (Uint8) color; |
148 | --n; /* fallthrough */ |
149 | case 2: |
150 | *p++ = (Uint8) color; |
151 | --n; /* fallthrough */ |
152 | case 3: |
153 | *p++ = (Uint8) color; |
154 | --n; /* fallthrough */ |
155 | } |
156 | SDL_memset4(p, color, (n >> 2)); |
157 | } |
158 | if (n & 3) { |
159 | p += (n & ~3); |
160 | switch (n & 3) { |
161 | case 3: |
162 | *p++ = (Uint8) color; /* fallthrough */ |
163 | case 2: |
164 | *p++ = (Uint8) color; /* fallthrough */ |
165 | case 1: |
166 | *p++ = (Uint8) color; /* fallthrough */ |
167 | } |
168 | } |
169 | pixels += pitch; |
170 | } |
171 | } |
172 | |
173 | static void |
174 | SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h) |
175 | { |
176 | int n; |
177 | Uint16 *p = NULL; |
178 | |
179 | while (h--) { |
180 | n = w; |
181 | p = (Uint16 *) pixels; |
182 | |
183 | if (n > 1) { |
184 | if ((uintptr_t) p & 2) { |
185 | *p++ = (Uint16) color; |
186 | --n; |
187 | } |
188 | SDL_memset4(p, color, (n >> 1)); |
189 | } |
190 | if (n & 1) { |
191 | p[n - 1] = (Uint16) color; |
192 | } |
193 | pixels += pitch; |
194 | } |
195 | } |
196 | |
197 | static void |
198 | SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h) |
199 | { |
200 | #if SDL_BYTEORDER == SDL_LIL_ENDIAN |
201 | Uint8 b1 = (Uint8) (color & 0xFF); |
202 | Uint8 b2 = (Uint8) ((color >> 8) & 0xFF); |
203 | Uint8 b3 = (Uint8) ((color >> 16) & 0xFF); |
204 | #elif SDL_BYTEORDER == SDL_BIG_ENDIAN |
205 | Uint8 b1 = (Uint8) ((color >> 16) & 0xFF); |
206 | Uint8 b2 = (Uint8) ((color >> 8) & 0xFF); |
207 | Uint8 b3 = (Uint8) (color & 0xFF); |
208 | #endif |
209 | int n; |
210 | Uint8 *p = NULL; |
211 | |
212 | while (h--) { |
213 | n = w; |
214 | p = pixels; |
215 | |
216 | while (n--) { |
217 | *p++ = b1; |
218 | *p++ = b2; |
219 | *p++ = b3; |
220 | } |
221 | pixels += pitch; |
222 | } |
223 | } |
224 | |
225 | static void |
226 | SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h) |
227 | { |
228 | while (h--) { |
229 | SDL_memset4(pixels, color, w); |
230 | pixels += pitch; |
231 | } |
232 | } |
233 | |
234 | /* |
235 | * This function performs a fast fill of the given rectangle with 'color' |
236 | */ |
237 | int |
238 | SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color) |
239 | { |
240 | if (!dst) { |
241 | return SDL_SetError("Passed NULL destination surface" ); |
242 | } |
243 | |
244 | /* If 'rect' == NULL, then fill the whole surface */ |
245 | if (!rect) { |
246 | rect = &dst->clip_rect; |
247 | /* Don't attempt to fill if the surface's clip_rect is empty */ |
248 | if (SDL_RectEmpty(rect)) { |
249 | return 0; |
250 | } |
251 | } |
252 | |
253 | return SDL_FillRects(dst, rect, 1, color); |
254 | } |
255 | |
256 | #if SDL_ARM_NEON_BLITTERS |
257 | void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src); |
258 | void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src); |
259 | void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src); |
260 | |
261 | static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { |
262 | FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color); |
263 | return; |
264 | } |
265 | |
266 | static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { |
267 | FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color); |
268 | return; |
269 | } |
270 | |
271 | static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { |
272 | FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color); |
273 | return; |
274 | } |
275 | #endif |
276 | |
277 | #if SDL_ARM_SIMD_BLITTERS |
278 | void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src); |
279 | void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src); |
280 | void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src); |
281 | |
282 | static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { |
283 | FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color); |
284 | return; |
285 | } |
286 | |
287 | static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { |
288 | FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color); |
289 | return; |
290 | } |
291 | |
292 | static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) { |
293 | FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color); |
294 | return; |
295 | } |
296 | #endif |
297 | |
298 | int |
299 | SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count, |
300 | Uint32 color) |
301 | { |
302 | SDL_Rect clipped; |
303 | Uint8 *pixels; |
304 | const SDL_Rect* rect; |
305 | void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL; |
306 | int i; |
307 | |
308 | if (!dst) { |
309 | return SDL_SetError("Passed NULL destination surface" ); |
310 | } |
311 | |
312 | /* This function doesn't work on surfaces < 8 bpp */ |
313 | if (dst->format->BitsPerPixel < 8) { |
314 | return SDL_SetError("SDL_FillRect(): Unsupported surface format" ); |
315 | } |
316 | |
317 | /* Nothing to do */ |
318 | if (dst->w == 0 || dst->h == 0) { |
319 | return 0; |
320 | } |
321 | |
322 | /* Perform software fill */ |
323 | if (!dst->pixels) { |
324 | return SDL_SetError("SDL_FillRect(): You must lock the surface" ); |
325 | } |
326 | |
327 | if (!rects) { |
328 | return SDL_SetError("SDL_FillRects() passed NULL rects" ); |
329 | } |
330 | |
331 | #if SDL_ARM_NEON_BLITTERS |
332 | if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) { |
333 | switch (dst->format->BytesPerPixel) { |
334 | case 1: |
335 | fill_function = fill_8_neon; |
336 | break; |
337 | case 2: |
338 | fill_function = fill_16_neon; |
339 | break; |
340 | case 4: |
341 | fill_function = fill_32_neon; |
342 | break; |
343 | } |
344 | } |
345 | #endif |
346 | #if SDL_ARM_SIMD_BLITTERS |
347 | if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) { |
348 | switch (dst->format->BytesPerPixel) { |
349 | case 1: |
350 | fill_function = fill_8_simd; |
351 | break; |
352 | case 2: |
353 | fill_function = fill_16_simd; |
354 | break; |
355 | case 4: |
356 | fill_function = fill_32_simd; |
357 | break; |
358 | } |
359 | } |
360 | #endif |
361 | |
362 | if (fill_function == NULL) { |
363 | switch (dst->format->BytesPerPixel) { |
364 | case 1: |
365 | { |
366 | color |= (color << 8); |
367 | color |= (color << 16); |
368 | #ifdef __SSE__ |
369 | if (SDL_HasSSE()) { |
370 | fill_function = SDL_FillRect1SSE; |
371 | break; |
372 | } |
373 | #endif |
374 | fill_function = SDL_FillRect1; |
375 | break; |
376 | } |
377 | |
378 | case 2: |
379 | { |
380 | color |= (color << 16); |
381 | #ifdef __SSE__ |
382 | if (SDL_HasSSE()) { |
383 | fill_function = SDL_FillRect2SSE; |
384 | break; |
385 | } |
386 | #endif |
387 | fill_function = SDL_FillRect2; |
388 | break; |
389 | } |
390 | |
391 | case 3: |
392 | /* 24-bit RGB is a slow path, at least for now. */ |
393 | { |
394 | fill_function = SDL_FillRect3; |
395 | break; |
396 | } |
397 | |
398 | case 4: |
399 | { |
400 | #ifdef __SSE__ |
401 | if (SDL_HasSSE()) { |
402 | fill_function = SDL_FillRect4SSE; |
403 | break; |
404 | } |
405 | #endif |
406 | fill_function = SDL_FillRect4; |
407 | break; |
408 | } |
409 | |
410 | default: |
411 | return SDL_SetError("Unsupported pixel format" ); |
412 | } |
413 | } |
414 | |
415 | for (i = 0; i < count; ++i) { |
416 | rect = &rects[i]; |
417 | /* Perform clipping */ |
418 | if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) { |
419 | continue; |
420 | } |
421 | rect = &clipped; |
422 | |
423 | pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch + |
424 | rect->x * dst->format->BytesPerPixel; |
425 | |
426 | fill_function(pixels, dst->pitch, color, rect->w, rect->h); |
427 | } |
428 | |
429 | /* We're done! */ |
430 | return 0; |
431 | } |
432 | |
433 | /* vi: set ts=4 sw=4 expandtab: */ |
434 | |