1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#include "SDL_surface_c.h"
24#include "SDL_blit_copy.h"
25
26#ifdef SDL_SSE_INTRINSICS
27// This assumes 16-byte aligned src and dst
28static SDL_INLINE void SDL_TARGETING("sse") SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
29{
30 int i;
31
32 __m128 values[4];
33 for (i = len / 64; i--;) {
34 _mm_prefetch((const char *)src, _MM_HINT_NTA);
35 values[0] = *(__m128 *)(src + 0);
36 values[1] = *(__m128 *)(src + 16);
37 values[2] = *(__m128 *)(src + 32);
38 values[3] = *(__m128 *)(src + 48);
39 _mm_stream_ps((float *)(dst + 0), values[0]);
40 _mm_stream_ps((float *)(dst + 16), values[1]);
41 _mm_stream_ps((float *)(dst + 32), values[2]);
42 _mm_stream_ps((float *)(dst + 48), values[3]);
43 src += 64;
44 dst += 64;
45 }
46
47 if (len & 63) {
48 SDL_memcpy(dst, src, len & 63);
49 }
50}
51#endif // SDL_SSE_INTRINSICS
52
53void SDL_BlitCopy(SDL_BlitInfo *info)
54{
55 bool overlap;
56 Uint8 *src, *dst;
57 int w, h;
58 int srcskip, dstskip;
59
60 w = info->dst_w * info->dst_fmt->bytes_per_pixel;
61 h = info->dst_h;
62 src = info->src;
63 dst = info->dst;
64 srcskip = info->src_pitch;
65 dstskip = info->dst_pitch;
66
67 // Properly handle overlapping blits
68 if (src < dst) {
69 overlap = (dst < (src + h * srcskip));
70 } else {
71 overlap = (src < (dst + h * dstskip));
72 }
73 if (overlap) {
74 if (dst < src) {
75 while (h--) {
76 SDL_memmove(dst, src, w);
77 src += srcskip;
78 dst += dstskip;
79 }
80 } else {
81 src += ((h - 1) * srcskip);
82 dst += ((h - 1) * dstskip);
83 while (h--) {
84 SDL_memmove(dst, src, w);
85 src -= srcskip;
86 dst -= dstskip;
87 }
88 }
89 return;
90 }
91
92#ifdef SDL_SSE_INTRINSICS
93 if (SDL_HasSSE() &&
94 !((uintptr_t)src & 15) && !(srcskip & 15) &&
95 !((uintptr_t)dst & 15) && !(dstskip & 15)) {
96 while (h--) {
97 SDL_memcpySSE(dst, src, w);
98 src += srcskip;
99 dst += dstskip;
100 }
101 return;
102 }
103#endif
104
105 while (h--) {
106 SDL_memcpy(dst, src, w);
107 src += srcskip;
108 dst += dstskip;
109 }
110}
111