1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 * Image affine transformation with Bicubic filtering
30 * SYNOPSIS
31 * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
32 * mlib_s32 *rightEdges,
33 * mlib_s32 *xStarts,
34 * mlib_s32 *yStarts,
35 * mlib_s32 *sides,
36 * mlib_u8 *dstData,
37 * mlib_u8 **lineAddr,
38 * mlib_s32 dstYStride,
39 * mlib_s32 is_affine,
40 * mlib_s32 srcYStride,
41 * mlib_filter filter)
42 *
43 * ARGUMENTS
44 * leftEdges array[dstHeight] of xLeft coordinates
45 * RightEdges array[dstHeight] of xRight coordinates
46 * xStarts array[dstHeight] of xStart * 65536 coordinates
47 * yStarts array[dstHeight] of yStart * 65536 coordinates
48 * sides output array[4]. sides[0] is yStart, sides[1] is yFinish,
49 * sides[2] is dx * 65536, sides[3] is dy * 65536
50 * dstData pointer to the first pixel on (yStart - 1) line
51 * lineAddr array[srcHeight] of pointers to the first pixel on
52 * the corresponding lines
53 * dstYStride stride of destination image
54 * is_affine indicator (Affine - GridWarp)
55 * srcYStride stride of source image
56 * filter type of resampling filter
57 *
58 * DESCRIPTION
59 * The functions step along the lines from xLeft to xRight and apply
60 * the bicubic filtering.
61 *
62 */
63
64#include "mlib_ImageAffine.h"
65
66#define DTYPE mlib_u16
67
68#define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bc
69
70#define FILTER_BITS 9
71
72/***************************************************************/
73#ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
74
75/***************************************************************/
76#undef FILTER_ELEM_BITS
77#define FILTER_ELEM_BITS 4
78
79/***************************************************************/
80#ifdef MLIB_USE_FTOI_CLAMPING
81
82#define SAT_U16(DST) \
83 DST = ((mlib_s32)(val0 - (mlib_d64)0x7FFF8000) >> 16) ^ 0x8000
84
85#else
86
87#define SAT_U16(DST) \
88 if (val0 >= MLIB_U32_MAX) \
89 DST = MLIB_U16_MAX; \
90 else if (val0 <= MLIB_U32_MIN) \
91 DST = MLIB_U16_MIN; \
92 else \
93 DST = ((mlib_u32)val0) >> 16
94
95#endif /* MLIB_USE_FTOI_CLAMPING */
96
97/***************************************************************/
98mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
99{
100 DECLAREVAR_BC();
101 DTYPE *dstLineEnd;
102 const mlib_f32 *mlib_filters_table;
103
104 if (filter == MLIB_BICUBIC) {
105 mlib_filters_table = mlib_filters_s16f_bc;
106 }
107 else {
108 mlib_filters_table = mlib_filters_s16f_bc2;
109 }
110
111 for (j = yStart; j <= yFinish; j++) {
112 mlib_d64 xf0, xf1, xf2, xf3;
113 mlib_d64 yf0, yf1, yf2, yf3;
114 mlib_d64 c0, c1, c2, c3, val0;
115 mlib_s32 filterpos;
116 mlib_f32 *fptr;
117 mlib_s32 s0, s1, s2, s3;
118 mlib_s32 s4, s5, s6, s7;
119
120 CLIP(1);
121 dstLineEnd = (DTYPE *) dstData + xRight;
122
123 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
124 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
125
126 xf0 = fptr[0];
127 xf1 = fptr[1];
128 xf2 = fptr[2];
129 xf3 = fptr[3];
130
131 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
132 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
133
134 yf0 = fptr[0];
135 yf1 = fptr[1];
136 yf2 = fptr[2];
137 yf3 = fptr[3];
138
139 xSrc = (X >> MLIB_SHIFT) - 1;
140 ySrc = (Y >> MLIB_SHIFT) - 1;
141
142 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
143 s0 = srcPixelPtr[0];
144 s1 = srcPixelPtr[1];
145 s2 = srcPixelPtr[2];
146 s3 = srcPixelPtr[3];
147
148 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
149 s4 = srcPixelPtr[0];
150 s5 = srcPixelPtr[1];
151 s6 = srcPixelPtr[2];
152 s7 = srcPixelPtr[3];
153
154 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
155
156 X += dX;
157 Y += dY;
158
159 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
160 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
161 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
162 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
163 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
164 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
165 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
166 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
167
168 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
169 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
170
171 xf0 = fptr[0];
172 xf1 = fptr[1];
173 xf2 = fptr[2];
174 xf3 = fptr[3];
175
176 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
177
178 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
179 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
180
181 yf0 = fptr[0];
182 yf1 = fptr[1];
183 yf2 = fptr[2];
184 yf3 = fptr[3];
185
186 SAT_U16(dstPixelPtr[0]);
187
188 xSrc = (X >> MLIB_SHIFT) - 1;
189 ySrc = (Y >> MLIB_SHIFT) - 1;
190
191 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
192 s0 = srcPixelPtr[0];
193 s1 = srcPixelPtr[1];
194 s2 = srcPixelPtr[2];
195 s3 = srcPixelPtr[3];
196
197 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
198 s4 = srcPixelPtr[0];
199 s5 = srcPixelPtr[1];
200 s6 = srcPixelPtr[2];
201 s7 = srcPixelPtr[3];
202 }
203
204 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
205 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
206 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
207 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
208 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
209 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
210 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
211 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
212
213 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
214 SAT_U16(dstPixelPtr[0]);
215 }
216
217 return MLIB_SUCCESS;
218}
219
220/***************************************************************/
221mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
222{
223 DECLAREVAR_BC();
224 DTYPE *dstLineEnd;
225 const mlib_f32 *mlib_filters_table;
226
227 if (filter == MLIB_BICUBIC) {
228 mlib_filters_table = mlib_filters_s16f_bc;
229 }
230 else {
231 mlib_filters_table = mlib_filters_s16f_bc2;
232 }
233
234 for (j = yStart; j <= yFinish; j++) {
235 mlib_d64 xf0, xf1, xf2, xf3;
236 mlib_d64 yf0, yf1, yf2, yf3;
237 mlib_d64 c0, c1, c2, c3, val0;
238 mlib_s32 filterpos, k;
239 mlib_f32 *fptr;
240 mlib_s32 s0, s1, s2, s3;
241 mlib_s32 s4, s5, s6, s7;
242
243 CLIP(2);
244 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
245
246 for (k = 0; k < 2; k++) {
247 mlib_s32 X1 = X;
248 mlib_s32 Y1 = Y;
249 DTYPE *dPtr = dstPixelPtr + k;
250
251 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
252 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
253
254 xf0 = fptr[0];
255 xf1 = fptr[1];
256 xf2 = fptr[2];
257 xf3 = fptr[3];
258
259 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
260 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
261
262 yf0 = fptr[0];
263 yf1 = fptr[1];
264 yf2 = fptr[2];
265 yf3 = fptr[3];
266
267 xSrc = (X1 >> MLIB_SHIFT) - 1;
268 ySrc = (Y1 >> MLIB_SHIFT) - 1;
269
270 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
271 s0 = srcPixelPtr[0];
272 s1 = srcPixelPtr[2];
273 s2 = srcPixelPtr[4];
274 s3 = srcPixelPtr[6];
275
276 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
277 s4 = srcPixelPtr[0];
278 s5 = srcPixelPtr[2];
279 s6 = srcPixelPtr[4];
280 s7 = srcPixelPtr[6];
281
282 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
283
284 X1 += dX;
285 Y1 += dY;
286
287 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
288 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
289 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
290 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
291 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
292 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
293 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
294 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
295
296 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
297 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
298
299 xf0 = fptr[0];
300 xf1 = fptr[1];
301 xf2 = fptr[2];
302 xf3 = fptr[3];
303
304 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
305
306 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
307 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
308
309 yf0 = fptr[0];
310 yf1 = fptr[1];
311 yf2 = fptr[2];
312 yf3 = fptr[3];
313
314 SAT_U16(dPtr[0]);
315
316 xSrc = (X1 >> MLIB_SHIFT) - 1;
317 ySrc = (Y1 >> MLIB_SHIFT) - 1;
318
319 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
320 s0 = srcPixelPtr[0];
321 s1 = srcPixelPtr[2];
322 s2 = srcPixelPtr[4];
323 s3 = srcPixelPtr[6];
324
325 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
326 s4 = srcPixelPtr[0];
327 s5 = srcPixelPtr[2];
328 s6 = srcPixelPtr[4];
329 s7 = srcPixelPtr[6];
330 }
331
332 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
333 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
334 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
335 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
336 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
337 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
338 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
339 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
340
341 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
342 SAT_U16(dPtr[0]);
343 }
344 }
345
346 return MLIB_SUCCESS;
347}
348
349/***************************************************************/
350mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
351{
352 DECLAREVAR_BC();
353 DTYPE *dstLineEnd;
354 const mlib_f32 *mlib_filters_table;
355
356 if (filter == MLIB_BICUBIC) {
357 mlib_filters_table = mlib_filters_s16f_bc;
358 }
359 else {
360 mlib_filters_table = mlib_filters_s16f_bc2;
361 }
362
363 for (j = yStart; j <= yFinish; j++) {
364 mlib_d64 xf0, xf1, xf2, xf3;
365 mlib_d64 yf0, yf1, yf2, yf3;
366 mlib_d64 c0, c1, c2, c3, val0;
367 mlib_s32 filterpos, k;
368 mlib_f32 *fptr;
369 mlib_s32 s0, s1, s2, s3;
370 mlib_s32 s4, s5, s6, s7;
371
372 CLIP(3);
373 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
374
375 for (k = 0; k < 3; k++) {
376 mlib_s32 X1 = X;
377 mlib_s32 Y1 = Y;
378 DTYPE *dPtr = dstPixelPtr + k;
379
380 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
381 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
382
383 xf0 = fptr[0];
384 xf1 = fptr[1];
385 xf2 = fptr[2];
386 xf3 = fptr[3];
387
388 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
389 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
390
391 yf0 = fptr[0];
392 yf1 = fptr[1];
393 yf2 = fptr[2];
394 yf3 = fptr[3];
395
396 xSrc = (X1 >> MLIB_SHIFT) - 1;
397 ySrc = (Y1 >> MLIB_SHIFT) - 1;
398
399 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
400 s0 = srcPixelPtr[0];
401 s1 = srcPixelPtr[3];
402 s2 = srcPixelPtr[6];
403 s3 = srcPixelPtr[9];
404
405 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
406 s4 = srcPixelPtr[0];
407 s5 = srcPixelPtr[3];
408 s6 = srcPixelPtr[6];
409 s7 = srcPixelPtr[9];
410
411 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
412
413 X1 += dX;
414 Y1 += dY;
415
416 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
417 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
418 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
419 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
420 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
421 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
422 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
423 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
424
425 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
426 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
427
428 xf0 = fptr[0];
429 xf1 = fptr[1];
430 xf2 = fptr[2];
431 xf3 = fptr[3];
432
433 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
434
435 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
436 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
437
438 yf0 = fptr[0];
439 yf1 = fptr[1];
440 yf2 = fptr[2];
441 yf3 = fptr[3];
442
443 SAT_U16(dPtr[0]);
444
445 xSrc = (X1 >> MLIB_SHIFT) - 1;
446 ySrc = (Y1 >> MLIB_SHIFT) - 1;
447
448 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
449 s0 = srcPixelPtr[0];
450 s1 = srcPixelPtr[3];
451 s2 = srcPixelPtr[6];
452 s3 = srcPixelPtr[9];
453
454 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
455 s4 = srcPixelPtr[0];
456 s5 = srcPixelPtr[3];
457 s6 = srcPixelPtr[6];
458 s7 = srcPixelPtr[9];
459 }
460
461 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
462 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
463 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
464 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
465 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
466 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
467 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
468 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
469
470 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
471 SAT_U16(dPtr[0]);
472 }
473 }
474
475 return MLIB_SUCCESS;
476}
477
478/***************************************************************/
479mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
480{
481 DECLAREVAR_BC();
482 DTYPE *dstLineEnd;
483 const mlib_f32 *mlib_filters_table;
484
485 if (filter == MLIB_BICUBIC) {
486 mlib_filters_table = mlib_filters_s16f_bc;
487 }
488 else {
489 mlib_filters_table = mlib_filters_s16f_bc2;
490 }
491
492 for (j = yStart; j <= yFinish; j++) {
493 mlib_d64 xf0, xf1, xf2, xf3;
494 mlib_d64 yf0, yf1, yf2, yf3;
495 mlib_d64 c0, c1, c2, c3, val0;
496 mlib_s32 filterpos, k;
497 mlib_f32 *fptr;
498 mlib_s32 s0, s1, s2, s3;
499 mlib_s32 s4, s5, s6, s7;
500
501 CLIP(4);
502 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
503
504 for (k = 0; k < 4; k++) {
505 mlib_s32 X1 = X;
506 mlib_s32 Y1 = Y;
507 DTYPE *dPtr = dstPixelPtr + k;
508
509 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
510 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
511
512 xf0 = fptr[0];
513 xf1 = fptr[1];
514 xf2 = fptr[2];
515 xf3 = fptr[3];
516
517 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
518 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
519
520 yf0 = fptr[0];
521 yf1 = fptr[1];
522 yf2 = fptr[2];
523 yf3 = fptr[3];
524
525 xSrc = (X1 >> MLIB_SHIFT) - 1;
526 ySrc = (Y1 >> MLIB_SHIFT) - 1;
527
528 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
529 s0 = srcPixelPtr[0];
530 s1 = srcPixelPtr[4];
531 s2 = srcPixelPtr[8];
532 s3 = srcPixelPtr[12];
533
534 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
535 s4 = srcPixelPtr[0];
536 s5 = srcPixelPtr[4];
537 s6 = srcPixelPtr[8];
538 s7 = srcPixelPtr[12];
539
540 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
541
542 X1 += dX;
543 Y1 += dY;
544
545 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
546 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
547 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
548 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
549 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
550 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
551 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
552 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
553
554 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
555 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
556
557 xf0 = fptr[0];
558 xf1 = fptr[1];
559 xf2 = fptr[2];
560 xf3 = fptr[3];
561
562 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
563
564 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
565 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
566
567 yf0 = fptr[0];
568 yf1 = fptr[1];
569 yf2 = fptr[2];
570 yf3 = fptr[3];
571
572 SAT_U16(dPtr[0]);
573
574 xSrc = (X1 >> MLIB_SHIFT) - 1;
575 ySrc = (Y1 >> MLIB_SHIFT) - 1;
576
577 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
578 s0 = srcPixelPtr[0];
579 s1 = srcPixelPtr[4];
580 s2 = srcPixelPtr[8];
581 s3 = srcPixelPtr[12];
582
583 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
584 s4 = srcPixelPtr[0];
585 s5 = srcPixelPtr[4];
586 s6 = srcPixelPtr[8];
587 s7 = srcPixelPtr[12];
588 }
589
590 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
591 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
592 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
593 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
594 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
595 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
596 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
597 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
598
599 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
600 SAT_U16(dPtr[0]);
601 }
602 }
603
604 return MLIB_SUCCESS;
605}
606
607#else /* for x86, using integer multiplies is faster */
608
609#define SHIFT_X 15
610#define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
611
612#define SHIFT_Y 14
613#define ROUND_Y (1 << (SHIFT_Y - 1))
614
615#define S32_TO_U16_SAT(DST) \
616 if (val0 >= MLIB_U16_MAX) \
617 DST = MLIB_U16_MAX; \
618 else if (val0 <= MLIB_U16_MIN) \
619 DST = MLIB_U16_MIN; \
620 else \
621 DST = (mlib_u16)val0
622
623/***************************************************************/
624mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
625{
626 DECLAREVAR_BC();
627 DTYPE *dstLineEnd;
628 const mlib_s16 *mlib_filters_table;
629
630 if (filter == MLIB_BICUBIC) {
631 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
632 }
633 else {
634 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
635 }
636
637 for (j = yStart; j <= yFinish; j++) {
638 mlib_s32 xf0, xf1, xf2, xf3;
639 mlib_s32 yf0, yf1, yf2, yf3;
640 mlib_s32 c0, c1, c2, c3, val0;
641 mlib_s32 filterpos;
642 mlib_s16 *fptr;
643 mlib_s32 s0, s1, s2, s3;
644 mlib_s32 s4, s5, s6, s7;
645
646 CLIP(1);
647 dstLineEnd = (DTYPE *) dstData + xRight;
648
649 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
650 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
651
652 xf0 = fptr[0] >> 1;
653 xf1 = fptr[1] >> 1;
654 xf2 = fptr[2] >> 1;
655 xf3 = fptr[3] >> 1;
656
657 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
658 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
659
660 yf0 = fptr[0];
661 yf1 = fptr[1];
662 yf2 = fptr[2];
663 yf3 = fptr[3];
664
665 xSrc = (X >> MLIB_SHIFT) - 1;
666 ySrc = (Y >> MLIB_SHIFT) - 1;
667
668 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
669 s0 = srcPixelPtr[0];
670 s1 = srcPixelPtr[1];
671 s2 = srcPixelPtr[2];
672 s3 = srcPixelPtr[3];
673
674 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
675 s4 = srcPixelPtr[0];
676 s5 = srcPixelPtr[1];
677 s6 = srcPixelPtr[2];
678 s7 = srcPixelPtr[3];
679
680 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
681
682 X += dX;
683 Y += dY;
684
685 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
686 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
687 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
688 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
689 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
690 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
691 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
692 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
693
694 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
695 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
696
697 xf0 = fptr[0] >> 1;
698 xf1 = fptr[1] >> 1;
699 xf2 = fptr[2] >> 1;
700 xf3 = fptr[3] >> 1;
701
702 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
703
704 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
705 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
706
707 yf0 = fptr[0];
708 yf1 = fptr[1];
709 yf2 = fptr[2];
710 yf3 = fptr[3];
711
712 S32_TO_U16_SAT(dstPixelPtr[0]);
713
714 xSrc = (X >> MLIB_SHIFT) - 1;
715 ySrc = (Y >> MLIB_SHIFT) - 1;
716
717 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
718 s0 = srcPixelPtr[0];
719 s1 = srcPixelPtr[1];
720 s2 = srcPixelPtr[2];
721 s3 = srcPixelPtr[3];
722
723 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
724 s4 = srcPixelPtr[0];
725 s5 = srcPixelPtr[1];
726 s6 = srcPixelPtr[2];
727 s7 = srcPixelPtr[3];
728 }
729
730 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
731 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
732 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
733 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
734 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
735 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
736 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
737 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
738
739 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
740 S32_TO_U16_SAT(dstPixelPtr[0]);
741 }
742
743 return MLIB_SUCCESS;
744}
745
746/***************************************************************/
747mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
748{
749 DECLAREVAR_BC();
750 DTYPE *dstLineEnd;
751 const mlib_s16 *mlib_filters_table;
752
753 if (filter == MLIB_BICUBIC) {
754 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
755 }
756 else {
757 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
758 }
759
760 for (j = yStart; j <= yFinish; j++) {
761 mlib_s32 xf0, xf1, xf2, xf3;
762 mlib_s32 yf0, yf1, yf2, yf3;
763 mlib_s32 c0, c1, c2, c3, val0;
764 mlib_s32 filterpos, k;
765 mlib_s16 *fptr;
766 mlib_s32 s0, s1, s2, s3;
767 mlib_s32 s4, s5, s6, s7;
768
769 CLIP(2);
770 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
771
772 for (k = 0; k < 2; k++) {
773 mlib_s32 X1 = X;
774 mlib_s32 Y1 = Y;
775 DTYPE *dPtr = dstPixelPtr + k;
776
777 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
778 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
779
780 xf0 = fptr[0] >> 1;
781 xf1 = fptr[1] >> 1;
782 xf2 = fptr[2] >> 1;
783 xf3 = fptr[3] >> 1;
784
785 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
786 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
787
788 yf0 = fptr[0];
789 yf1 = fptr[1];
790 yf2 = fptr[2];
791 yf3 = fptr[3];
792
793 xSrc = (X1 >> MLIB_SHIFT) - 1;
794 ySrc = (Y1 >> MLIB_SHIFT) - 1;
795
796 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
797 s0 = srcPixelPtr[0];
798 s1 = srcPixelPtr[2];
799 s2 = srcPixelPtr[4];
800 s3 = srcPixelPtr[6];
801
802 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
803 s4 = srcPixelPtr[0];
804 s5 = srcPixelPtr[2];
805 s6 = srcPixelPtr[4];
806 s7 = srcPixelPtr[6];
807
808 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
809
810 X1 += dX;
811 Y1 += dY;
812
813 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
814 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
815 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
816 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
817 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
818 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
819 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
820 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
821
822 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
823 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
824
825 xf0 = fptr[0] >> 1;
826 xf1 = fptr[1] >> 1;
827 xf2 = fptr[2] >> 1;
828 xf3 = fptr[3] >> 1;
829
830 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
831
832 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
833 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
834
835 yf0 = fptr[0];
836 yf1 = fptr[1];
837 yf2 = fptr[2];
838 yf3 = fptr[3];
839
840 S32_TO_U16_SAT(dPtr[0]);
841
842 xSrc = (X1 >> MLIB_SHIFT) - 1;
843 ySrc = (Y1 >> MLIB_SHIFT) - 1;
844
845 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
846 s0 = srcPixelPtr[0];
847 s1 = srcPixelPtr[2];
848 s2 = srcPixelPtr[4];
849 s3 = srcPixelPtr[6];
850
851 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
852 s4 = srcPixelPtr[0];
853 s5 = srcPixelPtr[2];
854 s6 = srcPixelPtr[4];
855 s7 = srcPixelPtr[6];
856 }
857
858 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
859 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
860 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
861 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
862 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
863 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
864 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
865 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
866
867 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
868 S32_TO_U16_SAT(dPtr[0]);
869 }
870 }
871
872 return MLIB_SUCCESS;
873}
874
875/***************************************************************/
876mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
877{
878 DECLAREVAR_BC();
879 DTYPE *dstLineEnd;
880 const mlib_s16 *mlib_filters_table;
881
882 if (filter == MLIB_BICUBIC) {
883 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
884 }
885 else {
886 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
887 }
888
889 for (j = yStart; j <= yFinish; j++) {
890 mlib_s32 xf0, xf1, xf2, xf3;
891 mlib_s32 yf0, yf1, yf2, yf3;
892 mlib_s32 c0, c1, c2, c3, val0;
893 mlib_s32 filterpos, k;
894 mlib_s16 *fptr;
895 mlib_s32 s0, s1, s2, s3;
896 mlib_s32 s4, s5, s6, s7;
897
898 CLIP(3);
899 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
900
901 for (k = 0; k < 3; k++) {
902 mlib_s32 X1 = X;
903 mlib_s32 Y1 = Y;
904 DTYPE *dPtr = dstPixelPtr + k;
905
906 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
907 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
908
909 xf0 = fptr[0] >> 1;
910 xf1 = fptr[1] >> 1;
911 xf2 = fptr[2] >> 1;
912 xf3 = fptr[3] >> 1;
913
914 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
915 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
916
917 yf0 = fptr[0];
918 yf1 = fptr[1];
919 yf2 = fptr[2];
920 yf3 = fptr[3];
921
922 xSrc = (X1 >> MLIB_SHIFT) - 1;
923 ySrc = (Y1 >> MLIB_SHIFT) - 1;
924
925 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
926 s0 = srcPixelPtr[0];
927 s1 = srcPixelPtr[3];
928 s2 = srcPixelPtr[6];
929 s3 = srcPixelPtr[9];
930
931 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
932 s4 = srcPixelPtr[0];
933 s5 = srcPixelPtr[3];
934 s6 = srcPixelPtr[6];
935 s7 = srcPixelPtr[9];
936
937 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
938
939 X1 += dX;
940 Y1 += dY;
941
942 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
943 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
944 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
945 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
946 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
947 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
948 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
949 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
950
951 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
952 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
953
954 xf0 = fptr[0] >> 1;
955 xf1 = fptr[1] >> 1;
956 xf2 = fptr[2] >> 1;
957 xf3 = fptr[3] >> 1;
958
959 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
960
961 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
962 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
963
964 yf0 = fptr[0];
965 yf1 = fptr[1];
966 yf2 = fptr[2];
967 yf3 = fptr[3];
968
969 S32_TO_U16_SAT(dPtr[0]);
970
971 xSrc = (X1 >> MLIB_SHIFT) - 1;
972 ySrc = (Y1 >> MLIB_SHIFT) - 1;
973
974 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
975 s0 = srcPixelPtr[0];
976 s1 = srcPixelPtr[3];
977 s2 = srcPixelPtr[6];
978 s3 = srcPixelPtr[9];
979
980 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
981 s4 = srcPixelPtr[0];
982 s5 = srcPixelPtr[3];
983 s6 = srcPixelPtr[6];
984 s7 = srcPixelPtr[9];
985 }
986
987 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
988 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
989 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
990 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
991 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
992 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
993 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
994 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
995
996 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
997 S32_TO_U16_SAT(dPtr[0]);
998 }
999 }
1000
1001 return MLIB_SUCCESS;
1002}
1003
1004/***************************************************************/
1005mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
1006{
1007 DECLAREVAR_BC();
1008 DTYPE *dstLineEnd;
1009 const mlib_s16 *mlib_filters_table;
1010
1011 if (filter == MLIB_BICUBIC) {
1012 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
1013 }
1014 else {
1015 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
1016 }
1017
1018 for (j = yStart; j <= yFinish; j++) {
1019 mlib_s32 xf0, xf1, xf2, xf3;
1020 mlib_s32 yf0, yf1, yf2, yf3;
1021 mlib_s32 c0, c1, c2, c3, val0;
1022 mlib_s32 filterpos, k;
1023 mlib_s16 *fptr;
1024 mlib_s32 s0, s1, s2, s3;
1025 mlib_s32 s4, s5, s6, s7;
1026
1027 CLIP(4);
1028 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1029
1030 for (k = 0; k < 4; k++) {
1031 mlib_s32 X1 = X;
1032 mlib_s32 Y1 = Y;
1033 DTYPE *dPtr = dstPixelPtr + k;
1034
1035 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1036 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1037
1038 xf0 = fptr[0] >> 1;
1039 xf1 = fptr[1] >> 1;
1040 xf2 = fptr[2] >> 1;
1041 xf3 = fptr[3] >> 1;
1042
1043 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1044 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1045
1046 yf0 = fptr[0];
1047 yf1 = fptr[1];
1048 yf2 = fptr[2];
1049 yf3 = fptr[3];
1050
1051 xSrc = (X1 >> MLIB_SHIFT) - 1;
1052 ySrc = (Y1 >> MLIB_SHIFT) - 1;
1053
1054 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1055 s0 = srcPixelPtr[0];
1056 s1 = srcPixelPtr[4];
1057 s2 = srcPixelPtr[8];
1058 s3 = srcPixelPtr[12];
1059
1060 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1061 s4 = srcPixelPtr[0];
1062 s5 = srcPixelPtr[4];
1063 s6 = srcPixelPtr[8];
1064 s7 = srcPixelPtr[12];
1065
1066 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1067
1068 X1 += dX;
1069 Y1 += dY;
1070
1071 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1072 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1073 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1074 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1075 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1076 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1077 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1078 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1079
1080 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1081 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1082
1083 xf0 = fptr[0] >> 1;
1084 xf1 = fptr[1] >> 1;
1085 xf2 = fptr[2] >> 1;
1086 xf3 = fptr[3] >> 1;
1087
1088 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1089
1090 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1091 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1092
1093 yf0 = fptr[0];
1094 yf1 = fptr[1];
1095 yf2 = fptr[2];
1096 yf3 = fptr[3];
1097
1098 S32_TO_U16_SAT(dPtr[0]);
1099
1100 xSrc = (X1 >> MLIB_SHIFT) - 1;
1101 ySrc = (Y1 >> MLIB_SHIFT) - 1;
1102
1103 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1104 s0 = srcPixelPtr[0];
1105 s1 = srcPixelPtr[4];
1106 s2 = srcPixelPtr[8];
1107 s3 = srcPixelPtr[12];
1108
1109 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1110 s4 = srcPixelPtr[0];
1111 s5 = srcPixelPtr[4];
1112 s6 = srcPixelPtr[8];
1113 s7 = srcPixelPtr[12];
1114 }
1115
1116 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1117 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1118 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1119 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1120 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1121 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1122 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1123 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1124
1125 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1126 S32_TO_U16_SAT(dPtr[0]);
1127 }
1128 }
1129
1130 return MLIB_SUCCESS;
1131}
1132
1133#endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1134
1135/***************************************************************/
1136