1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "../SDL_internal.h"
22
23#include "SDL_video.h"
24#include "SDL_blit.h"
25#include "SDL_cpuinfo.h"
26
27
28#ifdef __SSE__
29/* *INDENT-OFF* */
30
31#if defined(_MSC_VER) && !defined(__clang__)
32#define SSE_BEGIN \
33 __m128 c128; \
34 c128.m128_u32[0] = color; \
35 c128.m128_u32[1] = color; \
36 c128.m128_u32[2] = color; \
37 c128.m128_u32[3] = color;
38#else
39#define SSE_BEGIN \
40 __m128 c128; \
41 DECLARE_ALIGNED(Uint32, cccc[4], 16); \
42 cccc[0] = color; \
43 cccc[1] = color; \
44 cccc[2] = color; \
45 cccc[3] = color; \
46 c128 = *(__m128 *)cccc;
47#endif
48
49#define SSE_WORK \
50 for (i = n / 64; i--;) { \
51 _mm_stream_ps((float *)(p+0), c128); \
52 _mm_stream_ps((float *)(p+16), c128); \
53 _mm_stream_ps((float *)(p+32), c128); \
54 _mm_stream_ps((float *)(p+48), c128); \
55 p += 64; \
56 }
57
58#define SSE_END
59
60#define DEFINE_SSE_FILLRECT(bpp, type) \
61static void \
62SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
63{ \
64 int i, n; \
65 Uint8 *p = NULL; \
66 \
67 SSE_BEGIN; \
68 \
69 while (h--) { \
70 n = w * bpp; \
71 p = pixels; \
72 \
73 if (n > 63) { \
74 int adjust = 16 - ((uintptr_t)p & 15); \
75 if (adjust < 16) { \
76 n -= adjust; \
77 adjust /= bpp; \
78 while (adjust--) { \
79 *((type *)p) = (type)color; \
80 p += bpp; \
81 } \
82 } \
83 SSE_WORK; \
84 } \
85 if (n & 63) { \
86 int remainder = (n & 63); \
87 remainder /= bpp; \
88 while (remainder--) { \
89 *((type *)p) = (type)color; \
90 p += bpp; \
91 } \
92 } \
93 pixels += pitch; \
94 } \
95 \
96 SSE_END; \
97}
98
99static void
100SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
101{
102 int i, n;
103
104 SSE_BEGIN;
105 while (h--) {
106 Uint8 *p = pixels;
107 n = w;
108
109 if (n > 63) {
110 int adjust = 16 - ((uintptr_t)p & 15);
111 if (adjust) {
112 n -= adjust;
113 SDL_memset(p, color, adjust);
114 p += adjust;
115 }
116 SSE_WORK;
117 }
118 if (n & 63) {
119 int remainder = (n & 63);
120 SDL_memset(p, color, remainder);
121 }
122 pixels += pitch;
123 }
124
125 SSE_END;
126}
127/* DEFINE_SSE_FILLRECT(1, Uint8) */
128DEFINE_SSE_FILLRECT(2, Uint16)
129DEFINE_SSE_FILLRECT(4, Uint32)
130
131/* *INDENT-ON* */
132#endif /* __SSE__ */
133
134static void
135SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
136{
137 int n;
138 Uint8 *p = NULL;
139
140 while (h--) {
141 n = w;
142 p = pixels;
143
144 if (n > 3) {
145 switch ((uintptr_t) p & 3) {
146 case 1:
147 *p++ = (Uint8) color;
148 --n; /* fallthrough */
149 case 2:
150 *p++ = (Uint8) color;
151 --n; /* fallthrough */
152 case 3:
153 *p++ = (Uint8) color;
154 --n; /* fallthrough */
155 }
156 SDL_memset4(p, color, (n >> 2));
157 }
158 if (n & 3) {
159 p += (n & ~3);
160 switch (n & 3) {
161 case 3:
162 *p++ = (Uint8) color; /* fallthrough */
163 case 2:
164 *p++ = (Uint8) color; /* fallthrough */
165 case 1:
166 *p++ = (Uint8) color; /* fallthrough */
167 }
168 }
169 pixels += pitch;
170 }
171}
172
173static void
174SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
175{
176 int n;
177 Uint16 *p = NULL;
178
179 while (h--) {
180 n = w;
181 p = (Uint16 *) pixels;
182
183 if (n > 1) {
184 if ((uintptr_t) p & 2) {
185 *p++ = (Uint16) color;
186 --n;
187 }
188 SDL_memset4(p, color, (n >> 1));
189 }
190 if (n & 1) {
191 p[n - 1] = (Uint16) color;
192 }
193 pixels += pitch;
194 }
195}
196
197static void
198SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
199{
200#if SDL_BYTEORDER == SDL_LIL_ENDIAN
201 Uint8 b1 = (Uint8) (color & 0xFF);
202 Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
203 Uint8 b3 = (Uint8) ((color >> 16) & 0xFF);
204#elif SDL_BYTEORDER == SDL_BIG_ENDIAN
205 Uint8 b1 = (Uint8) ((color >> 16) & 0xFF);
206 Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
207 Uint8 b3 = (Uint8) (color & 0xFF);
208#endif
209 int n;
210 Uint8 *p = NULL;
211
212 while (h--) {
213 n = w;
214 p = pixels;
215
216 while (n--) {
217 *p++ = b1;
218 *p++ = b2;
219 *p++ = b3;
220 }
221 pixels += pitch;
222 }
223}
224
225static void
226SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
227{
228 while (h--) {
229 SDL_memset4(pixels, color, w);
230 pixels += pitch;
231 }
232}
233
234/*
235 * This function performs a fast fill of the given rectangle with 'color'
236 */
237int
238SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
239{
240 if (!dst) {
241 return SDL_SetError("Passed NULL destination surface");
242 }
243
244 /* If 'rect' == NULL, then fill the whole surface */
245 if (!rect) {
246 rect = &dst->clip_rect;
247 /* Don't attempt to fill if the surface's clip_rect is empty */
248 if (SDL_RectEmpty(rect)) {
249 return 0;
250 }
251 }
252
253 return SDL_FillRects(dst, rect, 1, color);
254}
255
256#if SDL_ARM_NEON_BLITTERS
257void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
258void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
259void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
260
261static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
262 FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
263 return;
264}
265
266static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
267 FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
268 return;
269}
270
271static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
272 FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
273 return;
274}
275#endif
276
277#if SDL_ARM_SIMD_BLITTERS
278void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
279void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
280void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
281
282static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
283 FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
284 return;
285}
286
287static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
288 FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
289 return;
290}
291
292static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
293 FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
294 return;
295}
296#endif
297
298int
299SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
300 Uint32 color)
301{
302 SDL_Rect clipped;
303 Uint8 *pixels;
304 const SDL_Rect* rect;
305 void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
306 int i;
307
308 if (!dst) {
309 return SDL_SetError("Passed NULL destination surface");
310 }
311
312 /* This function doesn't work on surfaces < 8 bpp */
313 if (dst->format->BitsPerPixel < 8) {
314 return SDL_SetError("SDL_FillRect(): Unsupported surface format");
315 }
316
317 /* Nothing to do */
318 if (dst->w == 0 || dst->h == 0) {
319 return 0;
320 }
321
322 /* Perform software fill */
323 if (!dst->pixels) {
324 return SDL_SetError("SDL_FillRect(): You must lock the surface");
325 }
326
327 if (!rects) {
328 return SDL_SetError("SDL_FillRects() passed NULL rects");
329 }
330
331#if SDL_ARM_NEON_BLITTERS
332 if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
333 switch (dst->format->BytesPerPixel) {
334 case 1:
335 fill_function = fill_8_neon;
336 break;
337 case 2:
338 fill_function = fill_16_neon;
339 break;
340 case 4:
341 fill_function = fill_32_neon;
342 break;
343 }
344 }
345#endif
346#if SDL_ARM_SIMD_BLITTERS
347 if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
348 switch (dst->format->BytesPerPixel) {
349 case 1:
350 fill_function = fill_8_simd;
351 break;
352 case 2:
353 fill_function = fill_16_simd;
354 break;
355 case 4:
356 fill_function = fill_32_simd;
357 break;
358 }
359 }
360#endif
361
362 if (fill_function == NULL) {
363 switch (dst->format->BytesPerPixel) {
364 case 1:
365 {
366 color |= (color << 8);
367 color |= (color << 16);
368#ifdef __SSE__
369 if (SDL_HasSSE()) {
370 fill_function = SDL_FillRect1SSE;
371 break;
372 }
373#endif
374 fill_function = SDL_FillRect1;
375 break;
376 }
377
378 case 2:
379 {
380 color |= (color << 16);
381#ifdef __SSE__
382 if (SDL_HasSSE()) {
383 fill_function = SDL_FillRect2SSE;
384 break;
385 }
386#endif
387 fill_function = SDL_FillRect2;
388 break;
389 }
390
391 case 3:
392 /* 24-bit RGB is a slow path, at least for now. */
393 {
394 fill_function = SDL_FillRect3;
395 break;
396 }
397
398 case 4:
399 {
400#ifdef __SSE__
401 if (SDL_HasSSE()) {
402 fill_function = SDL_FillRect4SSE;
403 break;
404 }
405#endif
406 fill_function = SDL_FillRect4;
407 break;
408 }
409
410 default:
411 return SDL_SetError("Unsupported pixel format");
412 }
413 }
414
415 for (i = 0; i < count; ++i) {
416 rect = &rects[i];
417 /* Perform clipping */
418 if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
419 continue;
420 }
421 rect = &clipped;
422
423 pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
424 rect->x * dst->format->BytesPerPixel;
425
426 fill_function(pixels, dst->pitch, color, rect->w, rect->h);
427 }
428
429 /* We're done! */
430 return 0;
431}
432
433/* vi: set ts=4 sw=4 expandtab: */
434