1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "../SDL_internal.h" |
22 | |
23 | #include "SDL_video.h" |
24 | #include "SDL_blit.h" |
25 | #include "SDL_render.h" |
26 | |
27 | static int SDL_LowerSoftStretchNearest(SDL_Surface *src, const SDL_Rect *srcrect, SDL_Surface *dst, const SDL_Rect *dstrect); |
28 | static int SDL_LowerSoftStretchLinear(SDL_Surface *src, const SDL_Rect *srcrect, SDL_Surface *dst, const SDL_Rect *dstrect); |
29 | static int SDL_UpperSoftStretch(SDL_Surface * src, const SDL_Rect * srcrect, SDL_Surface * dst, const SDL_Rect * dstrect, SDL_ScaleMode scaleMode); |
30 | |
31 | int |
32 | SDL_SoftStretch(SDL_Surface *src, const SDL_Rect *srcrect, |
33 | SDL_Surface *dst, const SDL_Rect *dstrect) |
34 | { |
35 | return SDL_UpperSoftStretch(src, srcrect, dst, dstrect, SDL_ScaleModeNearest); |
36 | } |
37 | |
38 | int |
39 | SDL_SoftStretchLinear(SDL_Surface *src, const SDL_Rect *srcrect, |
40 | SDL_Surface *dst, const SDL_Rect *dstrect) |
41 | { |
42 | return SDL_UpperSoftStretch(src, srcrect, dst, dstrect, SDL_ScaleModeLinear); |
43 | } |
44 | |
45 | static int |
46 | SDL_UpperSoftStretch(SDL_Surface * src, const SDL_Rect * srcrect, |
47 | SDL_Surface * dst, const SDL_Rect * dstrect, SDL_ScaleMode scaleMode) |
48 | { |
49 | int ret; |
50 | int src_locked; |
51 | int dst_locked; |
52 | SDL_Rect full_src; |
53 | SDL_Rect full_dst; |
54 | |
55 | if (src->format->format != dst->format->format) { |
56 | return SDL_SetError("Only works with same format surfaces" ); |
57 | } |
58 | |
59 | if (scaleMode != SDL_ScaleModeNearest) { |
60 | if (src->format->BytesPerPixel != 4 || src->format->format == SDL_PIXELFORMAT_ARGB2101010) { |
61 | return SDL_SetError("Wrong format" ); |
62 | } |
63 | } |
64 | |
65 | /* Verify the blit rectangles */ |
66 | if (srcrect) { |
67 | if ((srcrect->x < 0) || (srcrect->y < 0) || |
68 | ((srcrect->x + srcrect->w) > src->w) || |
69 | ((srcrect->y + srcrect->h) > src->h)) { |
70 | return SDL_SetError("Invalid source blit rectangle" ); |
71 | } |
72 | } else { |
73 | full_src.x = 0; |
74 | full_src.y = 0; |
75 | full_src.w = src->w; |
76 | full_src.h = src->h; |
77 | srcrect = &full_src; |
78 | } |
79 | if (dstrect) { |
80 | if ((dstrect->x < 0) || (dstrect->y < 0) || |
81 | ((dstrect->x + dstrect->w) > dst->w) || |
82 | ((dstrect->y + dstrect->h) > dst->h)) { |
83 | return SDL_SetError("Invalid destination blit rectangle" ); |
84 | } |
85 | } else { |
86 | full_dst.x = 0; |
87 | full_dst.y = 0; |
88 | full_dst.w = dst->w; |
89 | full_dst.h = dst->h; |
90 | dstrect = &full_dst; |
91 | } |
92 | |
93 | if (dstrect->w <= 0 || dstrect->h <= 0) { |
94 | return 0; |
95 | } |
96 | |
97 | if (srcrect->w > SDL_MAX_UINT16 || srcrect->h > SDL_MAX_UINT16 || |
98 | dstrect->w > SDL_MAX_UINT16 || dstrect->h > SDL_MAX_UINT16) { |
99 | return SDL_SetError("Size too large for scaling" ); |
100 | } |
101 | |
102 | /* Lock the destination if it's in hardware */ |
103 | dst_locked = 0; |
104 | if (SDL_MUSTLOCK(dst)) { |
105 | if (SDL_LockSurface(dst) < 0) { |
106 | return SDL_SetError("Unable to lock destination surface" ); |
107 | } |
108 | dst_locked = 1; |
109 | } |
110 | /* Lock the source if it's in hardware */ |
111 | src_locked = 0; |
112 | if (SDL_MUSTLOCK(src)) { |
113 | if (SDL_LockSurface(src) < 0) { |
114 | if (dst_locked) { |
115 | SDL_UnlockSurface(dst); |
116 | } |
117 | return SDL_SetError("Unable to lock source surface" ); |
118 | } |
119 | src_locked = 1; |
120 | } |
121 | |
122 | if (scaleMode == SDL_ScaleModeNearest) { |
123 | ret = SDL_LowerSoftStretchNearest(src, srcrect, dst, dstrect); |
124 | } else { |
125 | ret = SDL_LowerSoftStretchLinear(src, srcrect, dst, dstrect); |
126 | } |
127 | |
128 | /* We need to unlock the surfaces if they're locked */ |
129 | if (dst_locked) { |
130 | SDL_UnlockSurface(dst); |
131 | } |
132 | if (src_locked) { |
133 | SDL_UnlockSurface(src); |
134 | } |
135 | |
136 | return ret; |
137 | } |
138 | |
139 | /* bilinear interpolation precision must be < 8 |
140 | Because with SSE: add-multiply: _mm_madd_epi16 works with signed int |
141 | so pixels 0xb1...... are negatives and false the result |
142 | same in NEON probably */ |
143 | #define PRECISION 7 |
144 | |
145 | #define FIXED_POINT(i) ((Uint32)(i) << 16) |
146 | #define SRC_INDEX(fp) ((Uint32)(fp) >> 16) |
147 | #define INTEGER(fp) ((Uint32)(fp) >> PRECISION) |
148 | #define FRAC(fp) ((Uint32)(fp >> (16 - PRECISION)) & ((1<<PRECISION) - 1)) |
149 | #define FRAC_ZERO 0 |
150 | #define FRAC_ONE (1 << PRECISION) |
151 | #define FP_ONE FIXED_POINT(1) |
152 | |
153 | #define BILINEAR___START \ |
154 | int i; \ |
155 | int fp_sum_h, fp_step_h, left_pad_h, right_pad_h; \ |
156 | int fp_sum_w, fp_step_w, left_pad_w, right_pad_w; \ |
157 | int fp_sum_w_init, left_pad_w_init, right_pad_w_init, dst_gap, middle_init; \ |
158 | get_scaler_datas(src_h, dst_h, &fp_sum_h, &fp_step_h, &left_pad_h, &right_pad_h); \ |
159 | get_scaler_datas(src_w, dst_w, &fp_sum_w, &fp_step_w, &left_pad_w, &right_pad_w); \ |
160 | fp_sum_w_init = fp_sum_w + left_pad_w * fp_step_w; \ |
161 | left_pad_w_init = left_pad_w; \ |
162 | right_pad_w_init = right_pad_w; \ |
163 | dst_gap = dst_pitch - 4 * dst_w; \ |
164 | middle_init = dst_w - left_pad_w - right_pad_w; \ |
165 | |
166 | #define BILINEAR___HEIGHT \ |
167 | int index_h, frac_h0, frac_h1, middle; \ |
168 | const Uint32 *src_h0, *src_h1; \ |
169 | int no_padding, incr_h0, incr_h1; \ |
170 | \ |
171 | no_padding = !(i < left_pad_h || i > dst_h - 1 - right_pad_h); \ |
172 | index_h = SRC_INDEX(fp_sum_h); \ |
173 | frac_h0 = FRAC(fp_sum_h); \ |
174 | \ |
175 | index_h = no_padding ? index_h : (i < left_pad_h ? 0 : src_h - 1); \ |
176 | frac_h0 = no_padding ? frac_h0 : 0; \ |
177 | incr_h1 = no_padding ? src_pitch : 0; \ |
178 | incr_h0 = index_h * src_pitch; \ |
179 | \ |
180 | src_h0 = (const Uint32 *)((const Uint8 *)src + incr_h0); \ |
181 | src_h1 = (const Uint32 *)((const Uint8 *)src_h0 + incr_h1); \ |
182 | \ |
183 | fp_sum_h += fp_step_h; \ |
184 | \ |
185 | frac_h1 = FRAC_ONE - frac_h0; \ |
186 | fp_sum_w = fp_sum_w_init; \ |
187 | right_pad_w = right_pad_w_init; \ |
188 | left_pad_w = left_pad_w_init; \ |
189 | middle = middle_init; \ |
190 | |
191 | |
192 | |
193 | #if defined(__clang__) |
194 | // Remove inlining of this function |
195 | // Compiler crash with clang 9.0.8 / android-ndk-r21d |
196 | // Compiler crash with clang 11.0.3 / Xcode |
197 | // OK with clang 11.0.5 / android-ndk-22 |
198 | // OK with clang 12.0.0 / Xcode |
199 | __attribute__((noinline)) |
200 | #endif |
201 | static void |
202 | get_scaler_datas(int src_nb, int dst_nb, int *fp_start, int *fp_step, int *left_pad, int *right_pad) |
203 | { |
204 | |
205 | int step = FIXED_POINT(src_nb) / (dst_nb); /* source step in fixed point */ |
206 | int x0 = FP_ONE / 2; /* dst first pixel center at 0.5 in fixed point */ |
207 | int fp_sum; |
208 | int i; |
209 | #if 0 |
210 | /* scale to source coordinates */ |
211 | x0 *= src_nb; |
212 | x0 /= dst_nb; /* x0 == step / 2 */ |
213 | #else |
214 | /* Use this code for perfect match with pixman */ |
215 | Sint64 tmp[2]; |
216 | tmp[0] = (Sint64)step * (x0 >> 16); |
217 | tmp[1] = (Sint64)step * (x0 & 0xFFFF); |
218 | x0 = (int) (tmp[0] + ((tmp[1] + 0x8000) >> 16)); /* x0 == (step + 1) / 2 */ |
219 | #endif |
220 | /* -= 0.5, get back the pixel origin, in source coordinates */ |
221 | x0 -= FP_ONE / 2; |
222 | |
223 | *fp_start = x0; |
224 | *fp_step = step; |
225 | *left_pad = 0; |
226 | *right_pad = 0; |
227 | |
228 | fp_sum = x0; |
229 | for (i = 0; i < dst_nb; i++) { |
230 | if (fp_sum < 0) { |
231 | *left_pad += 1; |
232 | } else { |
233 | int index = SRC_INDEX(fp_sum); |
234 | if (index > src_nb - 2) { |
235 | *right_pad += 1; |
236 | } |
237 | } |
238 | fp_sum += step; |
239 | } |
240 | // SDL_Log("%d -> %d x0=%d step=%d left_pad=%d right_pad=%d", src_nb, dst_nb, *fp_start, *fp_step, *left_pad, *right_pad); |
241 | } |
242 | |
243 | typedef struct color_t { |
244 | Uint8 a; |
245 | Uint8 b; |
246 | Uint8 c; |
247 | Uint8 d; |
248 | } color_t; |
249 | |
250 | #if 0 |
251 | static void |
252 | printf_64(const char *str, void *var) |
253 | { |
254 | uint8_t *val = (uint8_t*) var; |
255 | printf(" * %s: %02x %02x %02x %02x _ %02x %02x %02x %02x\n" , |
256 | str, val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
257 | } |
258 | #endif |
259 | |
260 | /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ |
261 | |
262 | static SDL_INLINE void |
263 | INTERPOL(const Uint32 *src_x0, const Uint32 *src_x1, int frac0, int frac1, Uint32 *dst) |
264 | { |
265 | const color_t *c0 = (const color_t *)src_x0; |
266 | const color_t *c1 = (const color_t *)src_x1; |
267 | color_t *cx = (color_t *)dst; |
268 | #if 0 |
269 | cx->a = c0->a + INTEGER(frac0 * (c1->a - c0->a)); |
270 | cx->b = c0->b + INTEGER(frac0 * (c1->b - c0->b)); |
271 | cx->c = c0->c + INTEGER(frac0 * (c1->c - c0->c)); |
272 | cx->d = c0->d + INTEGER(frac0 * (c1->d - c0->d)); |
273 | #else |
274 | cx->a = INTEGER(frac1 * c0->a + frac0 * c1->a); |
275 | cx->b = INTEGER(frac1 * c0->b + frac0 * c1->b); |
276 | cx->c = INTEGER(frac1 * c0->c + frac0 * c1->c); |
277 | cx->d = INTEGER(frac1 * c0->d + frac0 * c1->d); |
278 | #endif |
279 | } |
280 | |
281 | static SDL_INLINE void |
282 | INTERPOL_BILINEAR(const Uint32 *s0, const Uint32 *s1, int frac_w0, int frac_h0, int frac_h1, Uint32 *dst) |
283 | { |
284 | Uint32 tmp[2]; |
285 | unsigned int frac_w1 = FRAC_ONE - frac_w0; |
286 | |
287 | /* Vertical first, store to 'tmp' */ |
288 | INTERPOL(s0, s1, frac_h0, frac_h1, tmp); |
289 | INTERPOL(s0 + 1, s1 + 1, frac_h0, frac_h1, tmp + 1); |
290 | |
291 | /* Horizontal, store to 'dst' */ |
292 | INTERPOL(tmp, tmp + 1, frac_w0, frac_w1, dst); |
293 | } |
294 | |
295 | static int |
296 | scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch, |
297 | Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
298 | { |
299 | BILINEAR___START |
300 | |
301 | for (i = 0; i < dst_h; i++) { |
302 | |
303 | BILINEAR___HEIGHT |
304 | |
305 | while (left_pad_w--) { |
306 | INTERPOL_BILINEAR(src_h0, src_h1, FRAC_ZERO, frac_h0, frac_h1, dst); |
307 | dst += 1; |
308 | } |
309 | |
310 | while (middle--) { |
311 | const Uint32 *s_00_01; |
312 | const Uint32 *s_10_11; |
313 | int index_w = 4 * SRC_INDEX(fp_sum_w); |
314 | int frac_w = FRAC(fp_sum_w); |
315 | fp_sum_w += fp_step_w; |
316 | |
317 | /* |
318 | x00 ... x0_ ..... x01 |
319 | . . . |
320 | . x . |
321 | . . . |
322 | . . . |
323 | x10 ... x1_ ..... x11 |
324 | */ |
325 | s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w); |
326 | s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w); |
327 | |
328 | INTERPOL_BILINEAR(s_00_01, s_10_11, frac_w, frac_h0, frac_h1, dst); |
329 | |
330 | dst += 1; |
331 | } |
332 | |
333 | while (right_pad_w--) { |
334 | int index_w = 4 * (src_w - 2); |
335 | const Uint32 *s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w); |
336 | const Uint32 *s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w); |
337 | INTERPOL_BILINEAR(s_00_01, s_10_11, FRAC_ONE, frac_h0, frac_h1, dst); |
338 | dst += 1; |
339 | } |
340 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
341 | } |
342 | return 0; |
343 | } |
344 | |
345 | #if defined(__SSE2__) |
346 | # define HAVE_SSE2_INTRINSICS 1 |
347 | #endif |
348 | |
349 | #if defined(__ARM_NEON) |
350 | # define HAVE_NEON_INTRINSICS 1 |
351 | # define CAST_uint8x8_t (uint8x8_t) |
352 | # define CAST_uint32x2_t (uint32x2_t) |
353 | #endif |
354 | |
355 | #if defined(__WINRT__) || defined(_MSC_VER) |
356 | # if defined(HAVE_NEON_INTRINSICS) |
357 | # undef CAST_uint8x8_t |
358 | # undef CAST_uint32x2_t |
359 | # define CAST_uint8x8_t |
360 | # define CAST_uint32x2_t |
361 | # endif |
362 | #endif |
363 | |
364 | #if defined(HAVE_SSE2_INTRINSICS) |
365 | |
366 | #if 0 |
367 | static void |
368 | printf_128(const char *str, __m128i var) |
369 | { |
370 | uint16_t *val = (uint16_t*) &var; |
371 | printf(" * %s: %04x %04x %04x %04x _ %04x %04x %04x %04x\n" , |
372 | str, val[0], val[1], val[2], val[3], val[4], val[5], val[6], val[7]); |
373 | } |
374 | #endif |
375 | |
376 | static SDL_INLINE int |
377 | hasSSE2() |
378 | { |
379 | static int val = -1; |
380 | if (val != -1) { |
381 | return val; |
382 | } |
383 | val = SDL_HasSSE2(); |
384 | return val; |
385 | } |
386 | |
387 | static SDL_INLINE void |
388 | INTERPOL_BILINEAR_SSE(const Uint32 *s0, const Uint32 *s1, int frac_w, __m128i v_frac_h0, __m128i v_frac_h1, Uint32 *dst, __m128i zero) |
389 | { |
390 | __m128i x_00_01, x_10_11; /* Pixels in 4*uint8 in row */ |
391 | __m128i v_frac_w0, k0, l0, d0, e0; |
392 | |
393 | int f, f2; |
394 | f = frac_w; |
395 | f2 = FRAC_ONE - frac_w; |
396 | v_frac_w0 = _mm_set_epi16(f, f2, f, f2, f, f2, f, f2); |
397 | |
398 | |
399 | x_00_01 = _mm_loadl_epi64((const __m128i *)s0); /* Load x00 and x01 */ |
400 | x_10_11 = _mm_loadl_epi64((const __m128i *)s1); |
401 | |
402 | /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ |
403 | |
404 | /* Interpolation vertical */ |
405 | k0 = _mm_mullo_epi16(_mm_unpacklo_epi8(x_00_01, zero), v_frac_h1); |
406 | l0 = _mm_mullo_epi16(_mm_unpacklo_epi8(x_10_11, zero), v_frac_h0); |
407 | k0 = _mm_add_epi16(k0, l0); |
408 | |
409 | /* For perfect match, clear the factionnal part eventually. */ |
410 | /* |
411 | k0 = _mm_srli_epi16(k0, PRECISION); |
412 | k0 = _mm_slli_epi16(k0, PRECISION); |
413 | */ |
414 | |
415 | /* Interpolation horizontal */ |
416 | l0 = _mm_unpacklo_epi64(/* unused */ l0, k0); |
417 | k0 = _mm_madd_epi16(_mm_unpackhi_epi16(l0, k0), v_frac_w0); |
418 | |
419 | /* Store 1 pixel */ |
420 | d0 = _mm_srli_epi32(k0, PRECISION * 2); |
421 | e0 = _mm_packs_epi32(d0, d0); |
422 | e0 = _mm_packus_epi16(e0, e0); |
423 | *dst = _mm_cvtsi128_si32(e0); |
424 | } |
425 | |
426 | static int |
427 | scale_mat_SSE(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
428 | { |
429 | BILINEAR___START |
430 | |
431 | for (i = 0; i < dst_h; i++) { |
432 | int nb_block2; |
433 | __m128i v_frac_h0; |
434 | __m128i v_frac_h1; |
435 | __m128i zero; |
436 | |
437 | BILINEAR___HEIGHT |
438 | |
439 | nb_block2 = middle / 2; |
440 | |
441 | v_frac_h0 = _mm_set_epi16(frac_h0, frac_h0, frac_h0, frac_h0, frac_h0, frac_h0, frac_h0, frac_h0); |
442 | v_frac_h1 = _mm_set_epi16(frac_h1, frac_h1, frac_h1, frac_h1, frac_h1, frac_h1, frac_h1, frac_h1); |
443 | zero = _mm_setzero_si128(); |
444 | |
445 | while (left_pad_w--) { |
446 | INTERPOL_BILINEAR_SSE(src_h0, src_h1, FRAC_ZERO, v_frac_h0, v_frac_h1, dst, zero); |
447 | dst += 1; |
448 | } |
449 | |
450 | while (nb_block2--) { |
451 | int index_w_0, frac_w_0; |
452 | int index_w_1, frac_w_1; |
453 | |
454 | const Uint32 *s_00_01, *s_02_03, *s_10_11, *s_12_13; |
455 | |
456 | __m128i x_00_01, x_10_11, x_02_03, x_12_13;/* Pixels in 4*uint8 in row */ |
457 | __m128i v_frac_w0, k0, l0, d0, e0; |
458 | __m128i v_frac_w1, k1, l1, d1, e1; |
459 | |
460 | int f, f2; |
461 | index_w_0 = 4 * SRC_INDEX(fp_sum_w); |
462 | frac_w_0 = FRAC(fp_sum_w); |
463 | fp_sum_w += fp_step_w; |
464 | index_w_1 = 4 * SRC_INDEX(fp_sum_w); |
465 | frac_w_1 = FRAC(fp_sum_w); |
466 | fp_sum_w += fp_step_w; |
467 | /* |
468 | x00............ x01 x02...........x03 |
469 | . . . . . . |
470 | j0 f0 j1 j2 f1 j3 |
471 | . . . . . . |
472 | . . . . . . |
473 | . . . . . . |
474 | x10............ x11 x12...........x13 |
475 | */ |
476 | s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_0); |
477 | s_02_03 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_1); |
478 | s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_0); |
479 | s_12_13 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_1); |
480 | |
481 | f = frac_w_0; |
482 | f2 = FRAC_ONE - frac_w_0; |
483 | v_frac_w0 = _mm_set_epi16(f, f2, f, f2, f, f2, f, f2); |
484 | |
485 | f = frac_w_1; |
486 | f2 = FRAC_ONE - frac_w_1; |
487 | v_frac_w1 = _mm_set_epi16(f, f2, f, f2, f, f2, f, f2); |
488 | |
489 | x_00_01 = _mm_loadl_epi64((const __m128i *)s_00_01); /* Load x00 and x01 */ |
490 | x_02_03 = _mm_loadl_epi64((const __m128i *)s_02_03); |
491 | x_10_11 = _mm_loadl_epi64((const __m128i *)s_10_11); |
492 | x_12_13 = _mm_loadl_epi64((const __m128i *)s_12_13); |
493 | |
494 | /* Interpolation vertical */ |
495 | k0 = _mm_mullo_epi16(_mm_unpacklo_epi8(x_00_01, zero), v_frac_h1); |
496 | l0 = _mm_mullo_epi16(_mm_unpacklo_epi8(x_10_11, zero), v_frac_h0); |
497 | k0 = _mm_add_epi16(k0, l0); |
498 | k1 = _mm_mullo_epi16(_mm_unpacklo_epi8(x_02_03, zero), v_frac_h1); |
499 | l1 = _mm_mullo_epi16(_mm_unpacklo_epi8(x_12_13, zero), v_frac_h0); |
500 | k1 = _mm_add_epi16(k1, l1); |
501 | |
502 | /* Interpolation horizontal */ |
503 | l0 = _mm_unpacklo_epi64(/* unused */ l0, k0); |
504 | k0 = _mm_madd_epi16(_mm_unpackhi_epi16(l0, k0), v_frac_w0); |
505 | l1 = _mm_unpacklo_epi64(/* unused */ l1, k1); |
506 | k1 = _mm_madd_epi16(_mm_unpackhi_epi16(l1, k1), v_frac_w1); |
507 | |
508 | /* Store 1 pixel */ |
509 | d0 = _mm_srli_epi32(k0, PRECISION * 2); |
510 | e0 = _mm_packs_epi32(d0, d0); |
511 | e0 = _mm_packus_epi16(e0, e0); |
512 | *dst++ = _mm_cvtsi128_si32(e0); |
513 | |
514 | /* Store 1 pixel */ |
515 | d1 = _mm_srli_epi32(k1, PRECISION * 2); |
516 | e1 = _mm_packs_epi32(d1, d1); |
517 | e1 = _mm_packus_epi16(e1, e1); |
518 | *dst++ = _mm_cvtsi128_si32(e1); |
519 | } |
520 | |
521 | /* Last point */ |
522 | if (middle & 0x1) { |
523 | const Uint32 *s_00_01; |
524 | const Uint32 *s_10_11; |
525 | int index_w = 4 * SRC_INDEX(fp_sum_w); |
526 | int frac_w = FRAC(fp_sum_w); |
527 | fp_sum_w += fp_step_w; |
528 | s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w); |
529 | s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w); |
530 | INTERPOL_BILINEAR_SSE(s_00_01, s_10_11, frac_w, v_frac_h0, v_frac_h1, dst, zero); |
531 | dst += 1; |
532 | } |
533 | |
534 | while (right_pad_w--) { |
535 | int index_w = 4 * (src_w - 2); |
536 | const Uint32 *s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w); |
537 | const Uint32 *s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w); |
538 | INTERPOL_BILINEAR_SSE(s_00_01, s_10_11, FRAC_ONE, v_frac_h0, v_frac_h1, dst, zero); |
539 | dst += 1; |
540 | } |
541 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
542 | } |
543 | return 0; |
544 | } |
545 | #endif |
546 | |
547 | #if defined(HAVE_NEON_INTRINSICS) |
548 | |
549 | static SDL_INLINE int |
550 | hasNEON() |
551 | { |
552 | static int val = -1; |
553 | if (val != -1) { |
554 | return val; |
555 | } |
556 | val = SDL_HasNEON(); |
557 | return val; |
558 | } |
559 | |
560 | static SDL_INLINE void |
561 | INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1, int frac_w, uint8x8_t v_frac_h0, uint8x8_t v_frac_h1, Uint32 *dst) |
562 | { |
563 | uint8x8_t x_00_01, x_10_11; /* Pixels in 4*uint8 in row */ |
564 | uint16x8_t k0; |
565 | uint32x4_t l0; |
566 | uint16x8_t d0; |
567 | uint8x8_t e0; |
568 | |
569 | x_00_01 = CAST_uint8x8_t vld1_u32(s0); /* Load 2 pixels */ |
570 | x_10_11 = CAST_uint8x8_t vld1_u32(s1); |
571 | |
572 | /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ |
573 | k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ |
574 | k0 = vmlal_u8(k0, x_10_11, v_frac_h0); /* k0 += x1 * frac */ |
575 | |
576 | /* k0 now contains 2 interpolated pixels { j0, j1 } */ |
577 | l0 = vshll_n_u16(vget_low_u16(k0), PRECISION); |
578 | l0 = vmlsl_n_u16(l0, vget_low_u16(k0), frac_w); |
579 | l0 = vmlal_n_u16(l0, vget_high_u16(k0), frac_w); |
580 | |
581 | /* Shift and narrow */ |
582 | d0 = vcombine_u16( |
583 | /* uint16x4_t */ vshrn_n_u32(l0, 2 * PRECISION), |
584 | /* uint16x4_t */ vshrn_n_u32(l0, 2 * PRECISION) |
585 | ); |
586 | |
587 | /* Narrow again */ |
588 | e0 = vmovn_u16(d0); |
589 | |
590 | /* Store 1 pixel */ |
591 | *dst = vget_lane_u32(CAST_uint32x2_t e0, 0); |
592 | } |
593 | |
594 | static int |
595 | scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
596 | { |
597 | BILINEAR___START |
598 | |
599 | for (i = 0; i < dst_h; i++) { |
600 | int nb_block4; |
601 | uint8x8_t v_frac_h0, v_frac_h1; |
602 | |
603 | BILINEAR___HEIGHT |
604 | |
605 | nb_block4 = middle / 4; |
606 | |
607 | v_frac_h0 = vmov_n_u8(frac_h0); |
608 | v_frac_h1 = vmov_n_u8(frac_h1); |
609 | |
610 | while (left_pad_w--) { |
611 | INTERPOL_BILINEAR_NEON(src_h0, src_h1, FRAC_ZERO, v_frac_h0, v_frac_h1, dst); |
612 | dst += 1; |
613 | } |
614 | |
615 | while (nb_block4--) { |
616 | int index_w_0, frac_w_0; |
617 | int index_w_1, frac_w_1; |
618 | int index_w_2, frac_w_2; |
619 | int index_w_3, frac_w_3; |
620 | |
621 | const Uint32 *s_00_01, *s_02_03, *s_04_05, *s_06_07; |
622 | const Uint32 *s_10_11, *s_12_13, *s_14_15, *s_16_17; |
623 | |
624 | uint8x8_t x_00_01, x_10_11, x_02_03, x_12_13;/* Pixels in 4*uint8 in row */ |
625 | uint8x8_t x_04_05, x_14_15, x_06_07, x_16_17; |
626 | |
627 | uint16x8_t k0, k1, k2, k3; |
628 | uint32x4_t l0, l1, l2, l3; |
629 | uint16x8_t d0, d1; |
630 | uint8x8_t e0, e1; |
631 | uint32x4_t f0; |
632 | |
633 | index_w_0 = 4 * SRC_INDEX(fp_sum_w); |
634 | frac_w_0 = FRAC(fp_sum_w); |
635 | fp_sum_w += fp_step_w; |
636 | index_w_1 = 4 * SRC_INDEX(fp_sum_w); |
637 | frac_w_1 = FRAC(fp_sum_w); |
638 | fp_sum_w += fp_step_w; |
639 | index_w_2 = 4 * SRC_INDEX(fp_sum_w); |
640 | frac_w_2 = FRAC(fp_sum_w); |
641 | fp_sum_w += fp_step_w; |
642 | index_w_3 = 4 * SRC_INDEX(fp_sum_w); |
643 | frac_w_3 = FRAC(fp_sum_w); |
644 | fp_sum_w += fp_step_w; |
645 | |
646 | s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_0); |
647 | s_02_03 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_1); |
648 | s_04_05 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_2); |
649 | s_06_07 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_3); |
650 | s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_0); |
651 | s_12_13 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_1); |
652 | s_14_15 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_2); |
653 | s_16_17 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_3); |
654 | |
655 | /* Interpolation vertical */ |
656 | x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01); /* Load 2 pixels */ |
657 | x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03); |
658 | x_04_05 = CAST_uint8x8_t vld1_u32(s_04_05); |
659 | x_06_07 = CAST_uint8x8_t vld1_u32(s_06_07); |
660 | x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11); |
661 | x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13); |
662 | x_14_15 = CAST_uint8x8_t vld1_u32(s_14_15); |
663 | x_16_17 = CAST_uint8x8_t vld1_u32(s_16_17); |
664 | |
665 | /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ |
666 | k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ |
667 | k0 = vmlal_u8(k0, x_10_11, v_frac_h0); /* k0 += x1 * frac */ |
668 | |
669 | k1 = vmull_u8(x_02_03, v_frac_h1); |
670 | k1 = vmlal_u8(k1, x_12_13, v_frac_h0); |
671 | |
672 | k2 = vmull_u8(x_04_05, v_frac_h1); |
673 | k2 = vmlal_u8(k2, x_14_15, v_frac_h0); |
674 | |
675 | k3 = vmull_u8(x_06_07, v_frac_h1); |
676 | k3 = vmlal_u8(k3, x_16_17, v_frac_h0); |
677 | |
678 | /* k0 now contains 2 interpolated pixels { j0, j1 } */ |
679 | /* k1 now contains 2 interpolated pixels { j2, j3 } */ |
680 | /* k2 now contains 2 interpolated pixels { j4, j5 } */ |
681 | /* k3 now contains 2 interpolated pixels { j6, j7 } */ |
682 | |
683 | l0 = vshll_n_u16(vget_low_u16(k0), PRECISION); |
684 | l0 = vmlsl_n_u16(l0, vget_low_u16(k0), frac_w_0); |
685 | l0 = vmlal_n_u16(l0, vget_high_u16(k0), frac_w_0); |
686 | |
687 | l1 = vshll_n_u16(vget_low_u16(k1), PRECISION); |
688 | l1 = vmlsl_n_u16(l1, vget_low_u16(k1), frac_w_1); |
689 | l1 = vmlal_n_u16(l1, vget_high_u16(k1), frac_w_1); |
690 | |
691 | l2 = vshll_n_u16(vget_low_u16(k2), PRECISION); |
692 | l2 = vmlsl_n_u16(l2, vget_low_u16(k2), frac_w_2); |
693 | l2 = vmlal_n_u16(l2, vget_high_u16(k2), frac_w_2); |
694 | |
695 | l3 = vshll_n_u16(vget_low_u16(k3), PRECISION); |
696 | l3 = vmlsl_n_u16(l3, vget_low_u16(k3), frac_w_3); |
697 | l3 = vmlal_n_u16(l3, vget_high_u16(k3), frac_w_3); |
698 | |
699 | /* shift and narrow */ |
700 | d0 = vcombine_u16( |
701 | /* uint16x4_t */ vshrn_n_u32(l0, 2 * PRECISION), |
702 | /* uint16x4_t */ vshrn_n_u32(l1, 2 * PRECISION) |
703 | ); |
704 | /* narrow again */ |
705 | e0 = vmovn_u16(d0); |
706 | |
707 | /* Shift and narrow */ |
708 | d1 = vcombine_u16( |
709 | /* uint16x4_t */ vshrn_n_u32(l2, 2 * PRECISION), |
710 | /* uint16x4_t */ vshrn_n_u32(l3, 2 * PRECISION) |
711 | ); |
712 | /* Narrow again */ |
713 | e1 = vmovn_u16(d1); |
714 | |
715 | f0 = vcombine_u32(CAST_uint32x2_t e0, CAST_uint32x2_t e1); |
716 | /* Store 4 pixels */ |
717 | vst1q_u32(dst, f0); |
718 | |
719 | dst += 4; |
720 | } |
721 | |
722 | if (middle & 0x2) { |
723 | int index_w_0, frac_w_0; |
724 | int index_w_1, frac_w_1; |
725 | const Uint32 *s_00_01, *s_02_03; |
726 | const Uint32 *s_10_11, *s_12_13; |
727 | uint8x8_t x_00_01, x_10_11, x_02_03, x_12_13;/* Pixels in 4*uint8 in row */ |
728 | uint16x8_t k0, k1; |
729 | uint32x4_t l0, l1; |
730 | uint16x8_t d0; |
731 | uint8x8_t e0; |
732 | |
733 | index_w_0 = 4 * SRC_INDEX(fp_sum_w); |
734 | frac_w_0 = FRAC(fp_sum_w); |
735 | fp_sum_w += fp_step_w; |
736 | index_w_1 = 4 * SRC_INDEX(fp_sum_w); |
737 | frac_w_1 = FRAC(fp_sum_w); |
738 | fp_sum_w += fp_step_w; |
739 | /* |
740 | x00............ x01 x02...........x03 |
741 | . . . . . . |
742 | j0 dest0 j1 j2 dest1 j3 |
743 | . . . . . . |
744 | . . . . . . |
745 | . . . . . . |
746 | x10............ x11 x12...........x13 |
747 | */ |
748 | s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_0); |
749 | s_02_03 = (const Uint32 *)((const Uint8 *)src_h0 + index_w_1); |
750 | s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_0); |
751 | s_12_13 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_1); |
752 | |
753 | /* Interpolation vertical */ |
754 | x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01);/* Load 2 pixels */ |
755 | x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03); |
756 | x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11); |
757 | x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13); |
758 | |
759 | /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */ |
760 | k0 = vmull_u8(x_00_01, v_frac_h1); /* k0 := x0 * (1 - frac) */ |
761 | k0 = vmlal_u8(k0, x_10_11, v_frac_h0); /* k0 += x1 * frac */ |
762 | |
763 | k1 = vmull_u8(x_02_03, v_frac_h1); |
764 | k1 = vmlal_u8(k1, x_12_13, v_frac_h0); |
765 | |
766 | /* k0 now contains 2 interpolated pixels { j0, j1 } */ |
767 | /* k1 now contains 2 interpolated pixels { j2, j3 } */ |
768 | |
769 | l0 = vshll_n_u16(vget_low_u16(k0), PRECISION); |
770 | l0 = vmlsl_n_u16(l0, vget_low_u16(k0), frac_w_0); |
771 | l0 = vmlal_n_u16(l0, vget_high_u16(k0), frac_w_0); |
772 | |
773 | l1 = vshll_n_u16(vget_low_u16(k1), PRECISION); |
774 | l1 = vmlsl_n_u16(l1, vget_low_u16(k1), frac_w_1); |
775 | l1 = vmlal_n_u16(l1, vget_high_u16(k1), frac_w_1); |
776 | |
777 | /* Shift and narrow */ |
778 | |
779 | d0 = vcombine_u16( |
780 | /* uint16x4_t */ vshrn_n_u32(l0, 2 * PRECISION), |
781 | /* uint16x4_t */ vshrn_n_u32(l1, 2 * PRECISION) |
782 | ); |
783 | |
784 | /* Narrow again */ |
785 | e0 = vmovn_u16(d0); |
786 | |
787 | /* Store 2 pixels */ |
788 | vst1_u32(dst, CAST_uint32x2_t e0); |
789 | dst += 2; |
790 | } |
791 | |
792 | /* Last point */ |
793 | if (middle & 0x1) { |
794 | int index_w = 4 * SRC_INDEX(fp_sum_w); |
795 | int frac_w = FRAC(fp_sum_w); |
796 | const Uint32 *s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w); |
797 | const Uint32 *s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w); |
798 | INTERPOL_BILINEAR_NEON(s_00_01, s_10_11, frac_w, v_frac_h0, v_frac_h1, dst); |
799 | dst += 1; |
800 | } |
801 | |
802 | while (right_pad_w--) { |
803 | int index_w = 4 * (src_w - 2); |
804 | const Uint32 *s_00_01 = (const Uint32 *)((const Uint8 *)src_h0 + index_w); |
805 | const Uint32 *s_10_11 = (const Uint32 *)((const Uint8 *)src_h1 + index_w); |
806 | INTERPOL_BILINEAR_NEON(s_00_01, s_10_11, FRAC_ONE, v_frac_h0, v_frac_h1, dst); |
807 | dst += 1; |
808 | } |
809 | |
810 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
811 | } |
812 | return 0; |
813 | } |
814 | #endif |
815 | |
816 | int |
817 | SDL_LowerSoftStretchLinear(SDL_Surface *s, const SDL_Rect *srcrect, |
818 | SDL_Surface *d, const SDL_Rect *dstrect) |
819 | { |
820 | int ret = -1; |
821 | int src_w = srcrect->w; |
822 | int src_h = srcrect->h; |
823 | int dst_w = dstrect->w; |
824 | int dst_h = dstrect->h; |
825 | int src_pitch = s->pitch; |
826 | int dst_pitch = d->pitch; |
827 | Uint32 *src = (Uint32 *) ((Uint8 *)s->pixels + srcrect->x * 4 + srcrect->y * src_pitch); |
828 | Uint32 *dst = (Uint32 *) ((Uint8 *)d->pixels + dstrect->x * 4 + dstrect->y * dst_pitch); |
829 | |
830 | #if defined(HAVE_NEON_INTRINSICS) |
831 | if (ret == -1 && hasNEON()) { |
832 | ret = scale_mat_NEON(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
833 | } |
834 | #endif |
835 | |
836 | #if defined(HAVE_SSE2_INTRINSICS) |
837 | if (ret == -1 && hasSSE2()) { |
838 | ret = scale_mat_SSE(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
839 | } |
840 | #endif |
841 | |
842 | if (ret == -1) { |
843 | ret = scale_mat(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
844 | } |
845 | |
846 | return ret; |
847 | } |
848 | |
849 | |
850 | #define SDL_SCALE_NEAREST__START \ |
851 | int i; \ |
852 | Uint32 posy, incy; \ |
853 | Uint32 posx, incx; \ |
854 | int dst_gap; \ |
855 | int srcy, n; \ |
856 | const Uint32 *src_h0; \ |
857 | incy = (src_h << 16) / dst_h; \ |
858 | incx = (src_w << 16) / dst_w; \ |
859 | dst_gap = dst_pitch - bpp * dst_w; \ |
860 | posy = incy / 2; \ |
861 | |
862 | #define SDL_SCALE_NEAREST__HEIGHT \ |
863 | srcy = (posy >> 16); \ |
864 | src_h0 = (const Uint32 *)((const Uint8 *)src_ptr + srcy * src_pitch); \ |
865 | posy += incy; \ |
866 | posx = incx / 2; \ |
867 | n = dst_w; |
868 | |
869 | |
870 | static int |
871 | scale_mat_nearest_1(const Uint32 *src_ptr, int src_w, int src_h, int src_pitch, |
872 | Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
873 | { |
874 | Uint32 bpp = 1; |
875 | SDL_SCALE_NEAREST__START |
876 | for (i = 0; i < dst_h; i++) { |
877 | SDL_SCALE_NEAREST__HEIGHT |
878 | while (n--) { |
879 | const Uint8 *src; |
880 | int srcx = bpp * (posx >> 16); |
881 | posx += incx; |
882 | src = (const Uint8 *)src_h0 + srcx; |
883 | *(Uint8*)dst = *src; |
884 | dst = (Uint32 *)((Uint8*)dst + bpp); |
885 | } |
886 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
887 | } |
888 | return 0; |
889 | } |
890 | |
891 | static int |
892 | scale_mat_nearest_2(const Uint32 *src_ptr, int src_w, int src_h, int src_pitch, |
893 | Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
894 | { |
895 | Uint32 bpp = 2; |
896 | SDL_SCALE_NEAREST__START |
897 | for (i = 0; i < dst_h; i++) { |
898 | SDL_SCALE_NEAREST__HEIGHT |
899 | while (n--) { |
900 | const Uint16 *src; |
901 | int srcx = bpp * (posx >> 16); |
902 | posx += incx; |
903 | src = (const Uint16 *)((const Uint8 *)src_h0 + srcx); |
904 | *(Uint16*)dst = *src; |
905 | dst = (Uint32 *)((Uint8*)dst + bpp); |
906 | } |
907 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
908 | } |
909 | return 0; |
910 | } |
911 | |
912 | static int |
913 | scale_mat_nearest_3(const Uint32 *src_ptr, int src_w, int src_h, int src_pitch, |
914 | Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
915 | { |
916 | Uint32 bpp = 3; |
917 | SDL_SCALE_NEAREST__START |
918 | for (i = 0; i < dst_h; i++) { |
919 | SDL_SCALE_NEAREST__HEIGHT |
920 | while (n--) { |
921 | const Uint8 *src; |
922 | int srcx = bpp * (posx >> 16); |
923 | posx += incx; |
924 | src = (const Uint8 *)src_h0 + srcx; |
925 | ((Uint8*)dst)[0] = src[0]; |
926 | ((Uint8*)dst)[1] = src[1]; |
927 | ((Uint8*)dst)[2] = src[2]; |
928 | dst = (Uint32 *)((Uint8*)dst + bpp); |
929 | } |
930 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
931 | } |
932 | return 0; |
933 | } |
934 | |
935 | static int |
936 | scale_mat_nearest_4(const Uint32 *src_ptr, int src_w, int src_h, int src_pitch, |
937 | Uint32 *dst, int dst_w, int dst_h, int dst_pitch) |
938 | { |
939 | Uint32 bpp = 4; |
940 | SDL_SCALE_NEAREST__START |
941 | for (i = 0; i < dst_h; i++) { |
942 | SDL_SCALE_NEAREST__HEIGHT |
943 | while (n--) { |
944 | const Uint32 *src; |
945 | int srcx = bpp * (posx >> 16); |
946 | posx += incx; |
947 | src = (const Uint32 *)((const Uint8 *)src_h0 + srcx); |
948 | *dst = *src; |
949 | dst = (Uint32 *)((Uint8*)dst + bpp); |
950 | } |
951 | dst = (Uint32 *)((Uint8 *)dst + dst_gap); |
952 | } |
953 | return 0; |
954 | } |
955 | |
956 | int |
957 | SDL_LowerSoftStretchNearest(SDL_Surface *s, const SDL_Rect *srcrect, |
958 | SDL_Surface *d, const SDL_Rect *dstrect) |
959 | { |
960 | int src_w = srcrect->w; |
961 | int src_h = srcrect->h; |
962 | int dst_w = dstrect->w; |
963 | int dst_h = dstrect->h; |
964 | int src_pitch = s->pitch; |
965 | int dst_pitch = d->pitch; |
966 | |
967 | const int bpp = d->format->BytesPerPixel; |
968 | |
969 | Uint32 *src = (Uint32 *) ((Uint8 *)s->pixels + srcrect->x * bpp + srcrect->y * src_pitch); |
970 | Uint32 *dst = (Uint32 *) ((Uint8 *)d->pixels + dstrect->x * bpp + dstrect->y * dst_pitch); |
971 | |
972 | if (bpp == 4) { |
973 | return scale_mat_nearest_4(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
974 | } else if (bpp == 3) { |
975 | return scale_mat_nearest_3(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
976 | } else if (bpp == 2) { |
977 | return scale_mat_nearest_2(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
978 | } else { |
979 | return scale_mat_nearest_1(src, src_w, src_h, src_pitch, dst, dst_w, dst_h, dst_pitch); |
980 | } |
981 | } |
982 | |
983 | /* vi: set ts=4 sw=4 expandtab: */ |
984 | |