1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 * Image affine transformation with Bicubic filtering
30 * SYNOPSIS
31 * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
32 * mlib_s32 *rightEdges,
33 * mlib_s32 *xStarts,
34 * mlib_s32 *yStarts,
35 * mlib_s32 *sides,
36 * mlib_u8 *dstData,
37 * mlib_u8 **lineAddr,
38 * mlib_s32 dstYStride,
39 * mlib_s32 is_affine,
40 * mlib_s32 srcYStride,
41 * mlib_filter filter)
42 *
43 * ARGUMENTS
44 * leftEdges array[dstHeight] of xLeft coordinates
45 * RightEdges array[dstHeight] of xRight coordinates
46 * xStarts array[dstHeight] of xStart * 65536 coordinates
47 * yStarts array[dstHeight] of yStart * 65536 coordinates
48 * sides output array[4]. sides[0] is yStart, sides[1] is yFinish,
49 * sides[2] is dx * 65536, sides[3] is dy * 65536
50 * dstData pointer to the first pixel on (yStart - 1) line
51 * lineAddr array[srcHeight] of pointers to the first pixel on
52 * the corresponding lines
53 * dstYStride stride of destination image
54 * is_affine indicator (Affine - GridWarp)
55 * srcYStride stride of source image
56 * filter type of resampling filter
57 *
58 * DESCRIPTION
59 * The functions step along the lines from xLeft to xRight and apply
60 * the bicubic filtering.
61 *
62 */
63
64#include "mlib_ImageAffine.h"
65
66#define DTYPE mlib_s16
67#define FILTER_BITS 9
68#define FUN_NAME(CHAN) mlib_ImageAffine_s16_##CHAN##_bc
69
70/***************************************************************/
71#ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
72
73#undef FILTER_ELEM_BITS
74#define FILTER_ELEM_BITS 4
75
76#ifdef MLIB_USE_FTOI_CLAMPING
77
78#define SAT16(DST) \
79 DST = ((mlib_s32)val0) >> 16
80
81#else
82
83#define SAT16(DST) \
84 if (val0 >= MLIB_S32_MAX) \
85 DST = MLIB_S16_MAX; \
86 else if (val0 <= MLIB_S32_MIN) \
87 DST = MLIB_S16_MIN; \
88 else \
89 DST = ((mlib_s32)val0) >> 16
90
91#endif /* MLIB_USE_FTOI_CLAMPING */
92
93mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
94{
95 DECLAREVAR_BC();
96 DTYPE *dstLineEnd;
97 const mlib_f32 *mlib_filters_table;
98
99 if (filter == MLIB_BICUBIC) {
100 mlib_filters_table = mlib_filters_s16f_bc;
101 }
102 else {
103 mlib_filters_table = mlib_filters_s16f_bc2;
104 }
105
106 for (j = yStart; j <= yFinish; j++) {
107 mlib_d64 xf0, xf1, xf2, xf3;
108 mlib_d64 yf0, yf1, yf2, yf3;
109 mlib_d64 c0, c1, c2, c3, val0;
110 mlib_s32 filterpos;
111 mlib_f32 *fptr;
112 mlib_s32 s0, s1, s2, s3;
113 mlib_s32 s4, s5, s6, s7;
114
115 CLIP(1);
116 dstLineEnd = (DTYPE *) dstData + xRight;
117
118 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
119 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
120
121 xf0 = fptr[0];
122 xf1 = fptr[1];
123 xf2 = fptr[2];
124 xf3 = fptr[3];
125
126 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
127 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
128
129 yf0 = fptr[0];
130 yf1 = fptr[1];
131 yf2 = fptr[2];
132 yf3 = fptr[3];
133
134 xSrc = (X >> MLIB_SHIFT) - 1;
135 ySrc = (Y >> MLIB_SHIFT) - 1;
136
137 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
138 s0 = srcPixelPtr[0];
139 s1 = srcPixelPtr[1];
140 s2 = srcPixelPtr[2];
141 s3 = srcPixelPtr[3];
142
143 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
144 s4 = srcPixelPtr[0];
145 s5 = srcPixelPtr[1];
146 s6 = srcPixelPtr[2];
147 s7 = srcPixelPtr[3];
148
149 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
150
151 X += dX;
152 Y += dY;
153
154 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
155 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
156 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
157 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
158 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
159 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
160 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
161 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
162
163 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
164 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
165
166 xf0 = fptr[0];
167 xf1 = fptr[1];
168 xf2 = fptr[2];
169 xf3 = fptr[3];
170
171 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
172
173 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
174 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
175
176 yf0 = fptr[0];
177 yf1 = fptr[1];
178 yf2 = fptr[2];
179 yf3 = fptr[3];
180
181 SAT16(dstPixelPtr[0]);
182
183 xSrc = (X >> MLIB_SHIFT) - 1;
184 ySrc = (Y >> MLIB_SHIFT) - 1;
185
186 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
187 s0 = srcPixelPtr[0];
188 s1 = srcPixelPtr[1];
189 s2 = srcPixelPtr[2];
190 s3 = srcPixelPtr[3];
191
192 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
193 s4 = srcPixelPtr[0];
194 s5 = srcPixelPtr[1];
195 s6 = srcPixelPtr[2];
196 s7 = srcPixelPtr[3];
197 }
198
199 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
200 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
201 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
202 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
203 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
204 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
205 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
206 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
207
208 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
209 SAT16(dstPixelPtr[0]);
210 }
211
212 return MLIB_SUCCESS;
213}
214
215mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
216{
217 DECLAREVAR_BC();
218 DTYPE *dstLineEnd;
219 const mlib_f32 *mlib_filters_table;
220
221 if (filter == MLIB_BICUBIC) {
222 mlib_filters_table = mlib_filters_s16f_bc;
223 }
224 else {
225 mlib_filters_table = mlib_filters_s16f_bc2;
226 }
227
228 for (j = yStart; j <= yFinish; j++) {
229 mlib_d64 xf0, xf1, xf2, xf3;
230 mlib_d64 yf0, yf1, yf2, yf3;
231 mlib_d64 c0, c1, c2, c3, val0;
232 mlib_s32 filterpos, k;
233 mlib_f32 *fptr;
234 mlib_s32 s0, s1, s2, s3;
235 mlib_s32 s4, s5, s6, s7;
236
237 CLIP(2);
238 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
239
240 for (k = 0; k < 2; k++) {
241 mlib_s32 X1 = X;
242 mlib_s32 Y1 = Y;
243 DTYPE *dPtr = dstPixelPtr + k;
244
245 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
246 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
247
248 xf0 = fptr[0];
249 xf1 = fptr[1];
250 xf2 = fptr[2];
251 xf3 = fptr[3];
252
253 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
254 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
255
256 yf0 = fptr[0];
257 yf1 = fptr[1];
258 yf2 = fptr[2];
259 yf3 = fptr[3];
260
261 xSrc = (X1 >> MLIB_SHIFT) - 1;
262 ySrc = (Y1 >> MLIB_SHIFT) - 1;
263
264 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
265 s0 = srcPixelPtr[0];
266 s1 = srcPixelPtr[2];
267 s2 = srcPixelPtr[4];
268 s3 = srcPixelPtr[6];
269
270 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
271 s4 = srcPixelPtr[0];
272 s5 = srcPixelPtr[2];
273 s6 = srcPixelPtr[4];
274 s7 = srcPixelPtr[6];
275
276 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
277
278 X1 += dX;
279 Y1 += dY;
280
281 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
282 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
283 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
284 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
285 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
286 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
287 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
288 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
289
290 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
291 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
292
293 xf0 = fptr[0];
294 xf1 = fptr[1];
295 xf2 = fptr[2];
296 xf3 = fptr[3];
297
298 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
299
300 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
301 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
302
303 yf0 = fptr[0];
304 yf1 = fptr[1];
305 yf2 = fptr[2];
306 yf3 = fptr[3];
307
308 SAT16(dPtr[0]);
309
310 xSrc = (X1 >> MLIB_SHIFT) - 1;
311 ySrc = (Y1 >> MLIB_SHIFT) - 1;
312
313 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
314 s0 = srcPixelPtr[0];
315 s1 = srcPixelPtr[2];
316 s2 = srcPixelPtr[4];
317 s3 = srcPixelPtr[6];
318
319 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
320 s4 = srcPixelPtr[0];
321 s5 = srcPixelPtr[2];
322 s6 = srcPixelPtr[4];
323 s7 = srcPixelPtr[6];
324 }
325
326 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
327 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
328 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
329 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
330 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
331 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
332 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
333 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
334
335 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
336 SAT16(dPtr[0]);
337 }
338 }
339
340 return MLIB_SUCCESS;
341}
342
343mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
344{
345 DECLAREVAR_BC();
346 DTYPE *dstLineEnd;
347 const mlib_f32 *mlib_filters_table;
348
349 if (filter == MLIB_BICUBIC) {
350 mlib_filters_table = mlib_filters_s16f_bc;
351 }
352 else {
353 mlib_filters_table = mlib_filters_s16f_bc2;
354 }
355
356 for (j = yStart; j <= yFinish; j++) {
357 mlib_d64 xf0, xf1, xf2, xf3;
358 mlib_d64 yf0, yf1, yf2, yf3;
359 mlib_d64 c0, c1, c2, c3, val0;
360 mlib_s32 filterpos, k;
361 mlib_f32 *fptr;
362 mlib_s32 s0, s1, s2, s3;
363 mlib_s32 s4, s5, s6, s7;
364
365 CLIP(3);
366 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
367
368 for (k = 0; k < 3; k++) {
369 mlib_s32 X1 = X;
370 mlib_s32 Y1 = Y;
371 DTYPE *dPtr = dstPixelPtr + k;
372
373 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
374 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
375
376 xf0 = fptr[0];
377 xf1 = fptr[1];
378 xf2 = fptr[2];
379 xf3 = fptr[3];
380
381 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
382 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
383
384 yf0 = fptr[0];
385 yf1 = fptr[1];
386 yf2 = fptr[2];
387 yf3 = fptr[3];
388
389 xSrc = (X1 >> MLIB_SHIFT) - 1;
390 ySrc = (Y1 >> MLIB_SHIFT) - 1;
391
392 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
393 s0 = srcPixelPtr[0];
394 s1 = srcPixelPtr[3];
395 s2 = srcPixelPtr[6];
396 s3 = srcPixelPtr[9];
397
398 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
399 s4 = srcPixelPtr[0];
400 s5 = srcPixelPtr[3];
401 s6 = srcPixelPtr[6];
402 s7 = srcPixelPtr[9];
403
404 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
405
406 X1 += dX;
407 Y1 += dY;
408
409 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
410 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
411 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
412 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
413 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
414 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
415 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
416 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
417
418 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
419 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
420
421 xf0 = fptr[0];
422 xf1 = fptr[1];
423 xf2 = fptr[2];
424 xf3 = fptr[3];
425
426 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
427
428 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
429 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
430
431 yf0 = fptr[0];
432 yf1 = fptr[1];
433 yf2 = fptr[2];
434 yf3 = fptr[3];
435
436 SAT16(dPtr[0]);
437
438 xSrc = (X1 >> MLIB_SHIFT) - 1;
439 ySrc = (Y1 >> MLIB_SHIFT) - 1;
440
441 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
442 s0 = srcPixelPtr[0];
443 s1 = srcPixelPtr[3];
444 s2 = srcPixelPtr[6];
445 s3 = srcPixelPtr[9];
446
447 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
448 s4 = srcPixelPtr[0];
449 s5 = srcPixelPtr[3];
450 s6 = srcPixelPtr[6];
451 s7 = srcPixelPtr[9];
452 }
453
454 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
455 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
456 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
457 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
458 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
459 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
460 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
461 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
462
463 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
464 SAT16(dPtr[0]);
465 }
466 }
467
468 return MLIB_SUCCESS;
469}
470
471mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
472{
473 DECLAREVAR_BC();
474 DTYPE *dstLineEnd;
475 const mlib_f32 *mlib_filters_table;
476
477 if (filter == MLIB_BICUBIC) {
478 mlib_filters_table = mlib_filters_s16f_bc;
479 }
480 else {
481 mlib_filters_table = mlib_filters_s16f_bc2;
482 }
483
484 for (j = yStart; j <= yFinish; j++) {
485 mlib_d64 xf0, xf1, xf2, xf3;
486 mlib_d64 yf0, yf1, yf2, yf3;
487 mlib_d64 c0, c1, c2, c3, val0;
488 mlib_s32 filterpos, k;
489 mlib_f32 *fptr;
490 mlib_s32 s0, s1, s2, s3;
491 mlib_s32 s4, s5, s6, s7;
492
493 CLIP(4);
494 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
495
496 for (k = 0; k < 4; k++) {
497 mlib_s32 X1 = X;
498 mlib_s32 Y1 = Y;
499 DTYPE *dPtr = dstPixelPtr + k;
500
501 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
502 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
503
504 xf0 = fptr[0];
505 xf1 = fptr[1];
506 xf2 = fptr[2];
507 xf3 = fptr[3];
508
509 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
510 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
511
512 yf0 = fptr[0];
513 yf1 = fptr[1];
514 yf2 = fptr[2];
515 yf3 = fptr[3];
516
517 xSrc = (X1 >> MLIB_SHIFT) - 1;
518 ySrc = (Y1 >> MLIB_SHIFT) - 1;
519
520 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
521 s0 = srcPixelPtr[0];
522 s1 = srcPixelPtr[4];
523 s2 = srcPixelPtr[8];
524 s3 = srcPixelPtr[12];
525
526 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
527 s4 = srcPixelPtr[0];
528 s5 = srcPixelPtr[4];
529 s6 = srcPixelPtr[8];
530 s7 = srcPixelPtr[12];
531
532 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
533
534 X1 += dX;
535 Y1 += dY;
536
537 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
538 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
539 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
540 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
541 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
542 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
543 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
544 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
545
546 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
547 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
548
549 xf0 = fptr[0];
550 xf1 = fptr[1];
551 xf2 = fptr[2];
552 xf3 = fptr[3];
553
554 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
555
556 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
557 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
558
559 yf0 = fptr[0];
560 yf1 = fptr[1];
561 yf2 = fptr[2];
562 yf3 = fptr[3];
563
564 SAT16(dPtr[0]);
565
566 xSrc = (X1 >> MLIB_SHIFT) - 1;
567 ySrc = (Y1 >> MLIB_SHIFT) - 1;
568
569 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
570 s0 = srcPixelPtr[0];
571 s1 = srcPixelPtr[4];
572 s2 = srcPixelPtr[8];
573 s3 = srcPixelPtr[12];
574
575 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
576 s4 = srcPixelPtr[0];
577 s5 = srcPixelPtr[4];
578 s6 = srcPixelPtr[8];
579 s7 = srcPixelPtr[12];
580 }
581
582 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
583 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
584 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
585 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
586 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
587 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
588 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
589 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
590
591 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
592 SAT16(dPtr[0]);
593 }
594 }
595
596 return MLIB_SUCCESS;
597}
598
599#else /* for x86, using integer multiplies is faster */
600
601#define SHIFT_X 15
602#define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
603
604#define SHIFT_Y (15 + 15 - SHIFT_X)
605#define ROUND_Y (1 << (SHIFT_Y - 1))
606
607#define S32_TO_S16_SAT(DST) \
608 if (val0 >= MLIB_S16_MAX) \
609 DST = MLIB_S16_MAX; \
610 else if (val0 <= MLIB_S16_MIN) \
611 DST = MLIB_S16_MIN; \
612 else \
613 DST = (mlib_s16)val0
614
615mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
616{
617 DECLAREVAR_BC();
618 DTYPE *dstLineEnd;
619 const mlib_s16 *mlib_filters_table;
620
621 if (filter == MLIB_BICUBIC) {
622 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
623 }
624 else {
625 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
626 }
627
628 for (j = yStart; j <= yFinish; j++) {
629 mlib_s32 xf0, xf1, xf2, xf3;
630 mlib_s32 yf0, yf1, yf2, yf3;
631 mlib_s32 c0, c1, c2, c3, val0;
632 mlib_s32 filterpos;
633 mlib_s16 *fptr;
634 mlib_s32 s0, s1, s2, s3;
635 mlib_s32 s4, s5, s6, s7;
636
637 CLIP(1);
638 dstLineEnd = (DTYPE *) dstData + xRight;
639
640 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
641 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
642
643 xf0 = fptr[0];
644 xf1 = fptr[1];
645 xf2 = fptr[2];
646 xf3 = fptr[3];
647
648 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
649 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
650
651 yf0 = fptr[0];
652 yf1 = fptr[1];
653 yf2 = fptr[2];
654 yf3 = fptr[3];
655
656 xSrc = (X >> MLIB_SHIFT) - 1;
657 ySrc = (Y >> MLIB_SHIFT) - 1;
658
659 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
660 s0 = srcPixelPtr[0];
661 s1 = srcPixelPtr[1];
662 s2 = srcPixelPtr[2];
663 s3 = srcPixelPtr[3];
664
665 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
666 s4 = srcPixelPtr[0];
667 s5 = srcPixelPtr[1];
668 s6 = srcPixelPtr[2];
669 s7 = srcPixelPtr[3];
670
671 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
672
673 X += dX;
674 Y += dY;
675
676 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
677 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
678 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
679 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
680 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
681 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
682 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
683 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
684
685 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
686 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
687
688 xf0 = fptr[0];
689 xf1 = fptr[1];
690 xf2 = fptr[2];
691 xf3 = fptr[3];
692
693 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
694
695 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
696 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
697
698 yf0 = fptr[0];
699 yf1 = fptr[1];
700 yf2 = fptr[2];
701 yf3 = fptr[3];
702
703 S32_TO_S16_SAT(dstPixelPtr[0]);
704
705 xSrc = (X >> MLIB_SHIFT) - 1;
706 ySrc = (Y >> MLIB_SHIFT) - 1;
707
708 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
709 s0 = srcPixelPtr[0];
710 s1 = srcPixelPtr[1];
711 s2 = srcPixelPtr[2];
712 s3 = srcPixelPtr[3];
713
714 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
715 s4 = srcPixelPtr[0];
716 s5 = srcPixelPtr[1];
717 s6 = srcPixelPtr[2];
718 s7 = srcPixelPtr[3];
719 }
720
721 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
722 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
723 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
724 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
725 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
726 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
727 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
728 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
729
730 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
731 S32_TO_S16_SAT(dstPixelPtr[0]);
732 }
733
734 return MLIB_SUCCESS;
735}
736
737mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
738{
739 DECLAREVAR_BC();
740 DTYPE *dstLineEnd;
741 const mlib_s16 *mlib_filters_table;
742
743 if (filter == MLIB_BICUBIC) {
744 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
745 }
746 else {
747 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
748 }
749
750 for (j = yStart; j <= yFinish; j++) {
751 mlib_s32 xf0, xf1, xf2, xf3;
752 mlib_s32 yf0, yf1, yf2, yf3;
753 mlib_s32 c0, c1, c2, c3, val0;
754 mlib_s32 filterpos, k;
755 mlib_s16 *fptr;
756 mlib_s32 s0, s1, s2, s3;
757 mlib_s32 s4, s5, s6, s7;
758
759 CLIP(2);
760 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
761
762 for (k = 0; k < 2; k++) {
763 mlib_s32 X1 = X;
764 mlib_s32 Y1 = Y;
765 DTYPE *dPtr = dstPixelPtr + k;
766
767 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
768 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
769
770 xf0 = fptr[0];
771 xf1 = fptr[1];
772 xf2 = fptr[2];
773 xf3 = fptr[3];
774
775 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
776 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
777
778 yf0 = fptr[0];
779 yf1 = fptr[1];
780 yf2 = fptr[2];
781 yf3 = fptr[3];
782
783 xSrc = (X1 >> MLIB_SHIFT) - 1;
784 ySrc = (Y1 >> MLIB_SHIFT) - 1;
785
786 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
787 s0 = srcPixelPtr[0];
788 s1 = srcPixelPtr[2];
789 s2 = srcPixelPtr[4];
790 s3 = srcPixelPtr[6];
791
792 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
793 s4 = srcPixelPtr[0];
794 s5 = srcPixelPtr[2];
795 s6 = srcPixelPtr[4];
796 s7 = srcPixelPtr[6];
797
798 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
799
800 X1 += dX;
801 Y1 += dY;
802
803 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
804 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
805 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
806 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
807 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
808 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
809 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
810 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
811
812 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
813 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
814
815 xf0 = fptr[0];
816 xf1 = fptr[1];
817 xf2 = fptr[2];
818 xf3 = fptr[3];
819
820 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
821
822 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
823 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
824
825 yf0 = fptr[0];
826 yf1 = fptr[1];
827 yf2 = fptr[2];
828 yf3 = fptr[3];
829
830 S32_TO_S16_SAT(dPtr[0]);
831
832 xSrc = (X1 >> MLIB_SHIFT) - 1;
833 ySrc = (Y1 >> MLIB_SHIFT) - 1;
834
835 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
836 s0 = srcPixelPtr[0];
837 s1 = srcPixelPtr[2];
838 s2 = srcPixelPtr[4];
839 s3 = srcPixelPtr[6];
840
841 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
842 s4 = srcPixelPtr[0];
843 s5 = srcPixelPtr[2];
844 s6 = srcPixelPtr[4];
845 s7 = srcPixelPtr[6];
846 }
847
848 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
849 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
850 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
851 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
852 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
853 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
854 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
855 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
856
857 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
858 S32_TO_S16_SAT(dPtr[0]);
859 }
860 }
861
862 return MLIB_SUCCESS;
863}
864
865mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
866{
867 DECLAREVAR_BC();
868 DTYPE *dstLineEnd;
869 const mlib_s16 *mlib_filters_table;
870
871 if (filter == MLIB_BICUBIC) {
872 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
873 }
874 else {
875 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
876 }
877
878 for (j = yStart; j <= yFinish; j++) {
879 mlib_s32 xf0, xf1, xf2, xf3;
880 mlib_s32 yf0, yf1, yf2, yf3;
881 mlib_s32 c0, c1, c2, c3, val0;
882 mlib_s32 filterpos, k;
883 mlib_s16 *fptr;
884 mlib_s32 s0, s1, s2, s3;
885 mlib_s32 s4, s5, s6, s7;
886
887 CLIP(3);
888 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
889
890 for (k = 0; k < 3; k++) {
891 mlib_s32 X1 = X;
892 mlib_s32 Y1 = Y;
893 DTYPE *dPtr = dstPixelPtr + k;
894
895 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
896 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
897
898 xf0 = fptr[0];
899 xf1 = fptr[1];
900 xf2 = fptr[2];
901 xf3 = fptr[3];
902
903 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
904 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
905
906 yf0 = fptr[0];
907 yf1 = fptr[1];
908 yf2 = fptr[2];
909 yf3 = fptr[3];
910
911 xSrc = (X1 >> MLIB_SHIFT) - 1;
912 ySrc = (Y1 >> MLIB_SHIFT) - 1;
913
914 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
915 s0 = srcPixelPtr[0];
916 s1 = srcPixelPtr[3];
917 s2 = srcPixelPtr[6];
918 s3 = srcPixelPtr[9];
919
920 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
921 s4 = srcPixelPtr[0];
922 s5 = srcPixelPtr[3];
923 s6 = srcPixelPtr[6];
924 s7 = srcPixelPtr[9];
925
926 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
927
928 X1 += dX;
929 Y1 += dY;
930
931 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
932 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
933 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
934 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
935 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
936 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
937 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
938 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
939
940 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
941 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
942
943 xf0 = fptr[0];
944 xf1 = fptr[1];
945 xf2 = fptr[2];
946 xf3 = fptr[3];
947
948 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
949
950 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
951 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
952
953 yf0 = fptr[0];
954 yf1 = fptr[1];
955 yf2 = fptr[2];
956 yf3 = fptr[3];
957
958 S32_TO_S16_SAT(dPtr[0]);
959
960 xSrc = (X1 >> MLIB_SHIFT) - 1;
961 ySrc = (Y1 >> MLIB_SHIFT) - 1;
962
963 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
964 s0 = srcPixelPtr[0];
965 s1 = srcPixelPtr[3];
966 s2 = srcPixelPtr[6];
967 s3 = srcPixelPtr[9];
968
969 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
970 s4 = srcPixelPtr[0];
971 s5 = srcPixelPtr[3];
972 s6 = srcPixelPtr[6];
973 s7 = srcPixelPtr[9];
974 }
975
976 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
977 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
978 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
979 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
980 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
981 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
982 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
983 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
984
985 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
986 S32_TO_S16_SAT(dPtr[0]);
987 }
988 }
989
990 return MLIB_SUCCESS;
991}
992
993mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
994{
995 DECLAREVAR_BC();
996 DTYPE *dstLineEnd;
997 const mlib_s16 *mlib_filters_table;
998
999 if (filter == MLIB_BICUBIC) {
1000 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
1001 }
1002 else {
1003 mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
1004 }
1005
1006 for (j = yStart; j <= yFinish; j++) {
1007 mlib_s32 xf0, xf1, xf2, xf3;
1008 mlib_s32 yf0, yf1, yf2, yf3;
1009 mlib_s32 c0, c1, c2, c3, val0;
1010 mlib_s32 filterpos, k;
1011 mlib_s16 *fptr;
1012 mlib_s32 s0, s1, s2, s3;
1013 mlib_s32 s4, s5, s6, s7;
1014
1015 CLIP(4);
1016 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1017
1018 for (k = 0; k < 4; k++) {
1019 mlib_s32 X1 = X;
1020 mlib_s32 Y1 = Y;
1021 DTYPE *dPtr = dstPixelPtr + k;
1022
1023 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1024 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1025
1026 xf0 = fptr[0];
1027 xf1 = fptr[1];
1028 xf2 = fptr[2];
1029 xf3 = fptr[3];
1030
1031 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1032 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1033
1034 yf0 = fptr[0];
1035 yf1 = fptr[1];
1036 yf2 = fptr[2];
1037 yf3 = fptr[3];
1038
1039 xSrc = (X1 >> MLIB_SHIFT) - 1;
1040 ySrc = (Y1 >> MLIB_SHIFT) - 1;
1041
1042 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1043 s0 = srcPixelPtr[0];
1044 s1 = srcPixelPtr[4];
1045 s2 = srcPixelPtr[8];
1046 s3 = srcPixelPtr[12];
1047
1048 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1049 s4 = srcPixelPtr[0];
1050 s5 = srcPixelPtr[4];
1051 s6 = srcPixelPtr[8];
1052 s7 = srcPixelPtr[12];
1053
1054 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1055
1056 X1 += dX;
1057 Y1 += dY;
1058
1059 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1060 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1061 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1062 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1063 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1064 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1065 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1066 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1067
1068 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1069 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1070
1071 xf0 = fptr[0];
1072 xf1 = fptr[1];
1073 xf2 = fptr[2];
1074 xf3 = fptr[3];
1075
1076 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1077
1078 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1079 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1080
1081 yf0 = fptr[0];
1082 yf1 = fptr[1];
1083 yf2 = fptr[2];
1084 yf3 = fptr[3];
1085
1086 S32_TO_S16_SAT(dPtr[0]);
1087
1088 xSrc = (X1 >> MLIB_SHIFT) - 1;
1089 ySrc = (Y1 >> MLIB_SHIFT) - 1;
1090
1091 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1092 s0 = srcPixelPtr[0];
1093 s1 = srcPixelPtr[4];
1094 s2 = srcPixelPtr[8];
1095 s3 = srcPixelPtr[12];
1096
1097 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1098 s4 = srcPixelPtr[0];
1099 s5 = srcPixelPtr[4];
1100 s6 = srcPixelPtr[8];
1101 s7 = srcPixelPtr[12];
1102 }
1103
1104 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1105 c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
1106 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1107 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1108 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1109 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1110 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1111 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1112
1113 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1114 S32_TO_S16_SAT(dPtr[0]);
1115 }
1116 }
1117
1118 return MLIB_SUCCESS;
1119}
1120
1121#endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1122
1123/***************************************************************/
1124