1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "../SDL_internal.h" |
22 | |
23 | #include "SDL_video.h" |
24 | #include "SDL_blit.h" |
25 | #include "SDL_blit_copy.h" |
26 | |
27 | |
28 | #ifdef __SSE__ |
29 | /* This assumes 16-byte aligned src and dst */ |
30 | static SDL_INLINE void |
31 | SDL_memcpySSE(Uint8 * dst, const Uint8 * src, int len) |
32 | { |
33 | int i; |
34 | |
35 | __m128 values[4]; |
36 | for (i = len / 64; i--;) { |
37 | _mm_prefetch(src, _MM_HINT_NTA); |
38 | values[0] = *(__m128 *) (src + 0); |
39 | values[1] = *(__m128 *) (src + 16); |
40 | values[2] = *(__m128 *) (src + 32); |
41 | values[3] = *(__m128 *) (src + 48); |
42 | _mm_stream_ps((float *) (dst + 0), values[0]); |
43 | _mm_stream_ps((float *) (dst + 16), values[1]); |
44 | _mm_stream_ps((float *) (dst + 32), values[2]); |
45 | _mm_stream_ps((float *) (dst + 48), values[3]); |
46 | src += 64; |
47 | dst += 64; |
48 | } |
49 | |
50 | if (len & 63) |
51 | SDL_memcpy(dst, src, len & 63); |
52 | } |
53 | #endif /* __SSE__ */ |
54 | |
55 | #ifdef __MMX__ |
56 | #ifdef _MSC_VER |
57 | #pragma warning(disable:4799) |
58 | #endif |
59 | static SDL_INLINE void |
60 | SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len) |
61 | { |
62 | const int remain = (len & 63); |
63 | int i; |
64 | |
65 | __m64* d64 = (__m64*)dst; |
66 | __m64* s64 = (__m64*)src; |
67 | |
68 | for(i= len / 64; i--;) { |
69 | d64[0] = s64[0]; |
70 | d64[1] = s64[1]; |
71 | d64[2] = s64[2]; |
72 | d64[3] = s64[3]; |
73 | d64[4] = s64[4]; |
74 | d64[5] = s64[5]; |
75 | d64[6] = s64[6]; |
76 | d64[7] = s64[7]; |
77 | |
78 | d64 += 8; |
79 | s64 += 8; |
80 | } |
81 | |
82 | if (remain) |
83 | { |
84 | const int skip = len - remain; |
85 | SDL_memcpy(dst + skip, src + skip, remain); |
86 | } |
87 | } |
88 | #endif /* __MMX__ */ |
89 | |
90 | void |
91 | SDL_BlitCopy(SDL_BlitInfo * info) |
92 | { |
93 | SDL_bool overlap; |
94 | Uint8 *src, *dst; |
95 | int w, h; |
96 | int srcskip, dstskip; |
97 | |
98 | w = info->dst_w * info->dst_fmt->BytesPerPixel; |
99 | h = info->dst_h; |
100 | src = info->src; |
101 | dst = info->dst; |
102 | srcskip = info->src_pitch; |
103 | dstskip = info->dst_pitch; |
104 | |
105 | /* Properly handle overlapping blits */ |
106 | if (src < dst) { |
107 | overlap = (dst < (src + h*srcskip)); |
108 | } else { |
109 | overlap = (src < (dst + h*dstskip)); |
110 | } |
111 | if (overlap) { |
112 | if ( dst < src ) { |
113 | while ( h-- ) { |
114 | SDL_memmove(dst, src, w); |
115 | src += srcskip; |
116 | dst += dstskip; |
117 | } |
118 | } else { |
119 | src += ((h-1) * srcskip); |
120 | dst += ((h-1) * dstskip); |
121 | while ( h-- ) { |
122 | SDL_memmove(dst, src, w); |
123 | src -= srcskip; |
124 | dst -= dstskip; |
125 | } |
126 | } |
127 | return; |
128 | } |
129 | |
130 | #ifdef __SSE__ |
131 | if (SDL_HasSSE() && |
132 | !((uintptr_t) src & 15) && !(srcskip & 15) && |
133 | !((uintptr_t) dst & 15) && !(dstskip & 15)) { |
134 | while (h--) { |
135 | SDL_memcpySSE(dst, src, w); |
136 | src += srcskip; |
137 | dst += dstskip; |
138 | } |
139 | return; |
140 | } |
141 | #endif |
142 | |
143 | #ifdef __MMX__ |
144 | if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) { |
145 | while (h--) { |
146 | SDL_memcpyMMX(dst, src, w); |
147 | src += srcskip; |
148 | dst += dstskip; |
149 | } |
150 | _mm_empty(); |
151 | return; |
152 | } |
153 | #endif |
154 | |
155 | while (h--) { |
156 | SDL_memcpy(dst, src, w); |
157 | src += srcskip; |
158 | dst += dstskip; |
159 | } |
160 | } |
161 | |
162 | /* vi: set ts=4 sw=4 expandtab: */ |
163 | |