1/*
2 * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 * Image affine transformation with Bicubic filtering
30 * SYNOPSIS
31 * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
32 * mlib_s32 *rightEdges,
33 * mlib_s32 *xStarts,
34 * mlib_s32 *yStarts,
35 * mlib_s32 *sides,
36 * mlib_u8 *dstData,
37 * mlib_u8 **lineAddr,
38 * mlib_s32 dstYStride,
39 * mlib_s32 is_affine,
40 * mlib_s32 srcYStride,
41 * mlib_filter filter)
42 *
43 *
44 * ARGUMENTS
45 * leftEdges array[dstHeight] of xLeft coordinates
46 * RightEdges array[dstHeight] of xRight coordinates
47 * xStarts array[dstHeight] of xStart * 65536 coordinates
48 * yStarts array[dstHeight] of yStart * 65536 coordinates
49 * sides output array[4]. sides[0] is yStart, sides[1] is yFinish,
50 * sides[2] is dx * 65536, sides[3] is dy * 65536
51 * dstData pointer to the first pixel on (yStart - 1) line
52 * lineAddr array[srcHeight] of pointers to the first pixel on
53 * the corresponding lines
54 * dstYStride stride of destination image
55 * is_affine indicator (Affine - GridWarp)
56 * srcYStride stride of source image
57 * filter type of resampling filter
58 *
59 * DESCRIPTION
60 * The functions step along the lines from xLeft to xRight and apply
61 * the bicubic filtering.
62 *
63 */
64
65#include "mlib_ImageAffine.h"
66
67#define DTYPE mlib_u8
68
69#define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc
70
71#define FILTER_BITS 8
72
73/***************************************************************/
74#ifdef __sparc /* for SPARC, using floating-point multiplies is faster */
75
76#undef FILTER_ELEM_BITS
77#define FILTER_ELEM_BITS 4
78
79#ifdef MLIB_USE_FTOI_CLAMPING
80
81#define SAT8(DST) \
82 DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80
83
84#else
85
86#define SAT8(DST) \
87 val0 -= sat; \
88 if (val0 >= MLIB_S32_MAX) \
89 DST = MLIB_U8_MAX; \
90 else if (val0 <= MLIB_S32_MIN) \
91 DST = MLIB_U8_MIN; \
92 else \
93 DST = ((mlib_s32)val0 >> 24) ^ 0x80
94
95#endif /* MLIB_USE_FTOI_CLAMPING */
96
97/***************************************************************/
98mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
99{
100 DECLAREVAR_BC();
101 DTYPE *dstLineEnd;
102 mlib_d64 sat = (mlib_d64) 0x7F800000;
103 const mlib_f32 *mlib_filters_table;
104
105 if (filter == MLIB_BICUBIC) {
106 mlib_filters_table = mlib_filters_u8f_bc;
107 }
108 else {
109 mlib_filters_table = mlib_filters_u8f_bc2;
110 }
111
112 for (j = yStart; j <= yFinish; j++) {
113 mlib_d64 xf0, xf1, xf2, xf3;
114 mlib_d64 yf0, yf1, yf2, yf3;
115 mlib_d64 c0, c1, c2, c3, val0;
116 mlib_s32 filterpos;
117 mlib_f32 *fptr;
118 mlib_u8 s0, s1, s2, s3;
119
120 CLIP(1);
121 dstLineEnd = (DTYPE *) dstData + xRight;
122
123 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
124 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
125
126 xf0 = fptr[0];
127 xf1 = fptr[1];
128 xf2 = fptr[2];
129 xf3 = fptr[3];
130
131 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
132 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
133
134 yf0 = fptr[0];
135 yf1 = fptr[1];
136 yf2 = fptr[2];
137 yf3 = fptr[3];
138
139 xSrc = (X >> MLIB_SHIFT) - 1;
140 ySrc = (Y >> MLIB_SHIFT) - 1;
141
142 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
143 s0 = srcPixelPtr[0];
144 s1 = srcPixelPtr[1];
145 s2 = srcPixelPtr[2];
146 s3 = srcPixelPtr[3];
147
148#ifdef __SUNPRO_C
149#pragma pipeloop(0)
150#endif /* __SUNPRO_C */
151 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
152 X += dX;
153 Y += dY;
154
155 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
156 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
157 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
158 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
159 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
160 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
161 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
162 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
163 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
164 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
165 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
166
167 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
168 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
169
170 xf0 = fptr[0];
171 xf1 = fptr[1];
172 xf2 = fptr[2];
173 xf3 = fptr[3];
174
175 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
176
177 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
178 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
179
180 yf0 = fptr[0];
181 yf1 = fptr[1];
182 yf2 = fptr[2];
183 yf3 = fptr[3];
184
185 SAT8(dstPixelPtr[0]);
186
187 xSrc = (X >> MLIB_SHIFT) - 1;
188 ySrc = (Y >> MLIB_SHIFT) - 1;
189
190 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
191 s0 = srcPixelPtr[0];
192 s1 = srcPixelPtr[1];
193 s2 = srcPixelPtr[2];
194 s3 = srcPixelPtr[3];
195 }
196
197 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
198 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
199 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
200 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
201 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
202 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
203 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
204 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
205 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
206 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 +
207 mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3);
208
209 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
210
211 SAT8(dstPixelPtr[0]);
212 }
213
214 return MLIB_SUCCESS;
215}
216
217/***************************************************************/
218mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
219{
220 DECLAREVAR_BC();
221 DTYPE *dstLineEnd;
222 mlib_d64 sat = (mlib_d64) 0x7F800000;
223 const mlib_f32 *mlib_filters_table;
224
225 if (filter == MLIB_BICUBIC) {
226 mlib_filters_table = mlib_filters_u8f_bc;
227 }
228 else {
229 mlib_filters_table = mlib_filters_u8f_bc2;
230 }
231
232 for (j = yStart; j <= yFinish; j++) {
233 mlib_d64 xf0, xf1, xf2, xf3;
234 mlib_d64 yf0, yf1, yf2, yf3;
235 mlib_d64 c0, c1, c2, c3, val0;
236 mlib_s32 filterpos, k;
237 mlib_f32 *fptr;
238 mlib_u8 s0, s1, s2, s3;
239
240 CLIP(2);
241 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
242
243 for (k = 0; k < 2; k++) {
244 mlib_s32 X1 = X;
245 mlib_s32 Y1 = Y;
246 DTYPE *dPtr = dstPixelPtr + k;
247
248 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
249 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
250
251 xf0 = fptr[0];
252 xf1 = fptr[1];
253 xf2 = fptr[2];
254 xf3 = fptr[3];
255
256 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
257 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
258
259 yf0 = fptr[0];
260 yf1 = fptr[1];
261 yf2 = fptr[2];
262 yf3 = fptr[3];
263
264 xSrc = (X1 >> MLIB_SHIFT) - 1;
265 ySrc = (Y1 >> MLIB_SHIFT) - 1;
266
267 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
268 s0 = srcPixelPtr[0];
269 s1 = srcPixelPtr[2];
270 s2 = srcPixelPtr[4];
271 s3 = srcPixelPtr[6];
272
273#ifdef __SUNPRO_C
274#pragma pipeloop(0)
275#endif /* __SUNPRO_C */
276 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
277 X1 += dX;
278 Y1 += dY;
279
280 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
281 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
282 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
283 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
284 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
285 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
286 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
287 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
288 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
289 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
290 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
291
292 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
293 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
294
295 xf0 = fptr[0];
296 xf1 = fptr[1];
297 xf2 = fptr[2];
298 xf3 = fptr[3];
299
300 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
301
302 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
303 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
304
305 yf0 = fptr[0];
306 yf1 = fptr[1];
307 yf2 = fptr[2];
308 yf3 = fptr[3];
309
310 SAT8(dPtr[0]);
311
312 xSrc = (X1 >> MLIB_SHIFT) - 1;
313 ySrc = (Y1 >> MLIB_SHIFT) - 1;
314
315 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
316 s0 = srcPixelPtr[0];
317 s1 = srcPixelPtr[2];
318 s2 = srcPixelPtr[4];
319 s3 = srcPixelPtr[6];
320 }
321
322 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
323 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
324 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
325 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
326 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
327 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
328 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
329 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
330 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
331 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 +
332 mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3);
333
334 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
335
336 SAT8(dPtr[0]);
337 }
338 }
339
340 return MLIB_SUCCESS;
341}
342
343/***************************************************************/
344mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
345{
346 DECLAREVAR_BC();
347 DTYPE *dstLineEnd;
348 mlib_d64 sat = (mlib_d64) 0x7F800000;
349 const mlib_f32 *mlib_filters_table;
350
351 if (filter == MLIB_BICUBIC) {
352 mlib_filters_table = mlib_filters_u8f_bc;
353 }
354 else {
355 mlib_filters_table = mlib_filters_u8f_bc2;
356 }
357
358 for (j = yStart; j <= yFinish; j++) {
359 mlib_d64 xf0, xf1, xf2, xf3;
360 mlib_d64 yf0, yf1, yf2, yf3;
361 mlib_d64 c0, c1, c2, c3, val0;
362 mlib_s32 filterpos, k;
363 mlib_f32 *fptr;
364 mlib_u8 s0, s1, s2, s3;
365
366 CLIP(3);
367 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
368
369 for (k = 0; k < 3; k++) {
370 mlib_s32 X1 = X;
371 mlib_s32 Y1 = Y;
372 DTYPE *dPtr = dstPixelPtr + k;
373
374 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
375 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
376
377 xf0 = fptr[0];
378 xf1 = fptr[1];
379 xf2 = fptr[2];
380 xf3 = fptr[3];
381
382 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
383 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
384
385 yf0 = fptr[0];
386 yf1 = fptr[1];
387 yf2 = fptr[2];
388 yf3 = fptr[3];
389
390 xSrc = (X1 >> MLIB_SHIFT) - 1;
391 ySrc = (Y1 >> MLIB_SHIFT) - 1;
392
393 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
394 s0 = srcPixelPtr[0];
395 s1 = srcPixelPtr[3];
396 s2 = srcPixelPtr[6];
397 s3 = srcPixelPtr[9];
398
399#ifdef __SUNPRO_C
400#pragma pipeloop(0)
401#endif /* __SUNPRO_C */
402 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
403 X1 += dX;
404 Y1 += dY;
405
406 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
407 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
408 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
409 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
410 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
411 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
412 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
413 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
414 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
415 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
416 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
417
418 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
419 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
420
421 xf0 = fptr[0];
422 xf1 = fptr[1];
423 xf2 = fptr[2];
424 xf3 = fptr[3];
425
426 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
427
428 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
429 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
430
431 yf0 = fptr[0];
432 yf1 = fptr[1];
433 yf2 = fptr[2];
434 yf3 = fptr[3];
435
436 SAT8(dPtr[0]);
437
438 xSrc = (X1 >> MLIB_SHIFT) - 1;
439 ySrc = (Y1 >> MLIB_SHIFT) - 1;
440
441 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
442 s0 = srcPixelPtr[0];
443 s1 = srcPixelPtr[3];
444 s2 = srcPixelPtr[6];
445 s3 = srcPixelPtr[9];
446 }
447
448 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
449 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
450 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
451 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
452 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
453 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
454 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
455 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
456 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
457 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 +
458 mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3);
459
460 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
461
462 SAT8(dPtr[0]);
463 }
464 }
465
466 return MLIB_SUCCESS;
467}
468
469/***************************************************************/
470mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
471{
472 DECLAREVAR_BC();
473 DTYPE *dstLineEnd;
474 mlib_d64 sat = (mlib_d64) 0x7F800000;
475 const mlib_f32 *mlib_filters_table;
476
477 if (filter == MLIB_BICUBIC) {
478 mlib_filters_table = mlib_filters_u8f_bc;
479 }
480 else {
481 mlib_filters_table = mlib_filters_u8f_bc2;
482 }
483
484 for (j = yStart; j <= yFinish; j++) {
485 mlib_d64 xf0, xf1, xf2, xf3;
486 mlib_d64 yf0, yf1, yf2, yf3;
487 mlib_d64 c0, c1, c2, c3, val0;
488 mlib_s32 filterpos, k;
489 mlib_f32 *fptr;
490 mlib_u8 s0, s1, s2, s3;
491
492 CLIP(4);
493 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
494
495 for (k = 0; k < 4; k++) {
496 mlib_s32 X1 = X;
497 mlib_s32 Y1 = Y;
498 DTYPE *dPtr = dstPixelPtr + k;
499
500 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
501 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
502
503 xf0 = fptr[0];
504 xf1 = fptr[1];
505 xf2 = fptr[2];
506 xf3 = fptr[3];
507
508 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
509 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
510
511 yf0 = fptr[0];
512 yf1 = fptr[1];
513 yf2 = fptr[2];
514 yf3 = fptr[3];
515
516 xSrc = (X1 >> MLIB_SHIFT) - 1;
517 ySrc = (Y1 >> MLIB_SHIFT) - 1;
518
519 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
520 s0 = srcPixelPtr[0];
521 s1 = srcPixelPtr[4];
522 s2 = srcPixelPtr[8];
523 s3 = srcPixelPtr[12];
524
525#ifdef __SUNPRO_C
526#pragma pipeloop(0)
527#endif /* __SUNPRO_C */
528 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
529 X1 += dX;
530 Y1 += dY;
531
532 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
533 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
534 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
535 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
536 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
537 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
538 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
539 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
540 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
541 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
542 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
543
544 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
545 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
546
547 xf0 = fptr[0];
548 xf1 = fptr[1];
549 xf2 = fptr[2];
550 xf3 = fptr[3];
551
552 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
553
554 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
555 fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos);
556
557 yf0 = fptr[0];
558 yf1 = fptr[1];
559 yf2 = fptr[2];
560 yf3 = fptr[3];
561
562 SAT8(dPtr[0]);
563
564 xSrc = (X1 >> MLIB_SHIFT) - 1;
565 ySrc = (Y1 >> MLIB_SHIFT) - 1;
566
567 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
568 s0 = srcPixelPtr[0];
569 s1 = srcPixelPtr[4];
570 s2 = srcPixelPtr[8];
571 s3 = srcPixelPtr[12];
572 }
573
574 c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 +
575 mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3);
576 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
577 c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
578 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
579 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
580 c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
581 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
582 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
583 c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 +
584 mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3);
585
586 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
587
588 SAT8(dPtr[0]);
589 }
590 }
591
592 return MLIB_SUCCESS;
593}
594
595#else /* for x86, using integer multiplies is faster */
596
597#define SHIFT_X 12
598#define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */
599
600#define SHIFT_Y (14 + 14 - SHIFT_X)
601#define ROUND_Y (1 << (SHIFT_Y - 1))
602
603/***************************************************************/
604/* Test for the presence of any "1" bit in bits
605 8 to 31 of val. If present, then val is either
606 negative or >255. If over/underflows of 8 bits
607 are uncommon, then this technique can be a win,
608 since only a single test, rather than two, is
609 necessary to determine if clamping is needed.
610 On the other hand, if over/underflows are common,
611 it adds an extra test.
612*/
613#define S32_TO_U8_SAT(DST) \
614 if (val0 & 0xffffff00) { \
615 if (val0 < MLIB_U8_MIN) \
616 DST = MLIB_U8_MIN; \
617 else \
618 DST = MLIB_U8_MAX; \
619 } else { \
620 DST = (mlib_u8)val0; \
621 }
622
623/***************************************************************/
624mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
625{
626 DECLAREVAR_BC();
627 DTYPE *dstLineEnd;
628 const mlib_s16 *mlib_filters_table;
629
630 if (filter == MLIB_BICUBIC) {
631 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
632 }
633 else {
634 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
635 }
636
637 for (j = yStart; j <= yFinish; j++) {
638 mlib_s32 xf0, xf1, xf2, xf3;
639 mlib_s32 yf0, yf1, yf2, yf3;
640 mlib_s32 c0, c1, c2, c3, val0;
641 mlib_s32 filterpos;
642 mlib_s16 *fptr;
643 mlib_u8 s0, s1, s2, s3;
644
645 CLIP(1);
646 dstLineEnd = (DTYPE *) dstData + xRight;
647
648 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
649 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
650
651 xf0 = fptr[0];
652 xf1 = fptr[1];
653 xf2 = fptr[2];
654 xf3 = fptr[3];
655
656 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
657 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
658
659 yf0 = fptr[0];
660 yf1 = fptr[1];
661 yf2 = fptr[2];
662 yf3 = fptr[3];
663
664 xSrc = (X >> MLIB_SHIFT) - 1;
665 ySrc = (Y >> MLIB_SHIFT) - 1;
666
667 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
668 s0 = srcPixelPtr[0];
669 s1 = srcPixelPtr[1];
670 s2 = srcPixelPtr[2];
671 s3 = srcPixelPtr[3];
672
673#ifdef __SUNPRO_C
674#pragma pipeloop(0)
675#endif /* __SUNPRO_C */
676 for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
677 X += dX;
678 Y += dY;
679
680 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
681 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
682 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
683 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
684 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
685 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
686 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
687 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
688 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
689 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
690
691 filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
692 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
693
694 xf0 = fptr[0];
695 xf1 = fptr[1];
696 xf2 = fptr[2];
697 xf3 = fptr[3];
698
699 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
700
701 filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
702 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
703
704 yf0 = fptr[0];
705 yf1 = fptr[1];
706 yf2 = fptr[2];
707 yf3 = fptr[3];
708
709 S32_TO_U8_SAT(dstPixelPtr[0]);
710
711 xSrc = (X >> MLIB_SHIFT) - 1;
712 ySrc = (Y >> MLIB_SHIFT) - 1;
713
714 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
715 s0 = srcPixelPtr[0];
716 s1 = srcPixelPtr[1];
717 s2 = srcPixelPtr[2];
718 s3 = srcPixelPtr[3];
719 }
720
721 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
722 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
723 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
724 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
725 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
726 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
727 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
728 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
729 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
730 srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
731
732 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
733
734 S32_TO_U8_SAT(dstPixelPtr[0]);
735 }
736
737 return MLIB_SUCCESS;
738}
739
740/***************************************************************/
741mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
742{
743 DECLAREVAR_BC();
744 DTYPE *dstLineEnd;
745 const mlib_s16 *mlib_filters_table;
746
747 if (filter == MLIB_BICUBIC) {
748 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
749 }
750 else {
751 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
752 }
753
754 for (j = yStart; j <= yFinish; j++) {
755 mlib_s32 xf0, xf1, xf2, xf3;
756 mlib_s32 yf0, yf1, yf2, yf3;
757 mlib_s32 c0, c1, c2, c3, val0;
758 mlib_s32 filterpos, k;
759 mlib_s16 *fptr;
760 mlib_u8 s0, s1, s2, s3;
761
762 CLIP(2);
763 dstLineEnd = (DTYPE *) dstData + 2 * xRight;
764
765 for (k = 0; k < 2; k++) {
766 mlib_s32 X1 = X;
767 mlib_s32 Y1 = Y;
768 DTYPE *dPtr = dstPixelPtr + k;
769
770 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
771 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
772
773 xf0 = fptr[0];
774 xf1 = fptr[1];
775 xf2 = fptr[2];
776 xf3 = fptr[3];
777
778 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
779 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
780
781 yf0 = fptr[0];
782 yf1 = fptr[1];
783 yf2 = fptr[2];
784 yf3 = fptr[3];
785
786 xSrc = (X1 >> MLIB_SHIFT) - 1;
787 ySrc = (Y1 >> MLIB_SHIFT) - 1;
788
789 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
790 s0 = srcPixelPtr[0];
791 s1 = srcPixelPtr[2];
792 s2 = srcPixelPtr[4];
793 s3 = srcPixelPtr[6];
794
795#ifdef __SUNPRO_C
796#pragma pipeloop(0)
797#endif /* __SUNPRO_C */
798 for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
799 X1 += dX;
800 Y1 += dY;
801
802 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
803 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
804 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
805 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
806 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
807 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
808 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
809 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
810 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
811 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
812
813 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
814 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
815
816 xf0 = fptr[0];
817 xf1 = fptr[1];
818 xf2 = fptr[2];
819 xf3 = fptr[3];
820
821 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
822
823 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
824 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
825
826 yf0 = fptr[0];
827 yf1 = fptr[1];
828 yf2 = fptr[2];
829 yf3 = fptr[3];
830
831 S32_TO_U8_SAT(dPtr[0]);
832
833 xSrc = (X1 >> MLIB_SHIFT) - 1;
834 ySrc = (Y1 >> MLIB_SHIFT) - 1;
835
836 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
837 s0 = srcPixelPtr[0];
838 s1 = srcPixelPtr[2];
839 s2 = srcPixelPtr[4];
840 s3 = srcPixelPtr[6];
841 }
842
843 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
844 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
845 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
846 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
847 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
848 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
849 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
850 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
851 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
852 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
853
854 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
855
856 S32_TO_U8_SAT(dPtr[0]);
857 }
858 }
859
860 return MLIB_SUCCESS;
861}
862
863/***************************************************************/
864mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
865{
866 DECLAREVAR_BC();
867 DTYPE *dstLineEnd;
868 const mlib_s16 *mlib_filters_table;
869
870 if (filter == MLIB_BICUBIC) {
871 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
872 }
873 else {
874 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
875 }
876
877 for (j = yStart; j <= yFinish; j++) {
878 mlib_s32 xf0, xf1, xf2, xf3;
879 mlib_s32 yf0, yf1, yf2, yf3;
880 mlib_s32 c0, c1, c2, c3, val0;
881 mlib_s32 filterpos, k;
882 mlib_s16 *fptr;
883 mlib_u8 s0, s1, s2, s3;
884
885 CLIP(3);
886 dstLineEnd = (DTYPE *) dstData + 3 * xRight;
887
888 for (k = 0; k < 3; k++) {
889 mlib_s32 X1 = X;
890 mlib_s32 Y1 = Y;
891 DTYPE *dPtr = dstPixelPtr + k;
892
893 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
894 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
895
896 xf0 = fptr[0];
897 xf1 = fptr[1];
898 xf2 = fptr[2];
899 xf3 = fptr[3];
900
901 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
902 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
903
904 yf0 = fptr[0];
905 yf1 = fptr[1];
906 yf2 = fptr[2];
907 yf3 = fptr[3];
908
909 xSrc = (X1 >> MLIB_SHIFT) - 1;
910 ySrc = (Y1 >> MLIB_SHIFT) - 1;
911
912 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
913 s0 = srcPixelPtr[0];
914 s1 = srcPixelPtr[3];
915 s2 = srcPixelPtr[6];
916 s3 = srcPixelPtr[9];
917
918#ifdef __SUNPRO_C
919#pragma pipeloop(0)
920#endif /* __SUNPRO_C */
921 for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
922 X1 += dX;
923 Y1 += dY;
924
925 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
926 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
927 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
928 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
929 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
930 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
931 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
932 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
933 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
934 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
935
936 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
937 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
938
939 xf0 = fptr[0];
940 xf1 = fptr[1];
941 xf2 = fptr[2];
942 xf3 = fptr[3];
943
944 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
945
946 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
947 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
948
949 yf0 = fptr[0];
950 yf1 = fptr[1];
951 yf2 = fptr[2];
952 yf3 = fptr[3];
953
954 S32_TO_U8_SAT(dPtr[0]);
955
956 xSrc = (X1 >> MLIB_SHIFT) - 1;
957 ySrc = (Y1 >> MLIB_SHIFT) - 1;
958
959 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
960 s0 = srcPixelPtr[0];
961 s1 = srcPixelPtr[3];
962 s2 = srcPixelPtr[6];
963 s3 = srcPixelPtr[9];
964 }
965
966 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
967 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
968 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
969 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
970 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
971 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
972 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
973 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
974 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
975 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
976
977 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
978
979 S32_TO_U8_SAT(dPtr[0]);
980 }
981 }
982
983 return MLIB_SUCCESS;
984}
985
986/***************************************************************/
987mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
988{
989 DECLAREVAR_BC();
990 DTYPE *dstLineEnd;
991 const mlib_s16 *mlib_filters_table;
992
993 if (filter == MLIB_BICUBIC) {
994 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc;
995 }
996 else {
997 mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2;
998 }
999
1000 for (j = yStart; j <= yFinish; j++) {
1001 mlib_s32 xf0, xf1, xf2, xf3;
1002 mlib_s32 yf0, yf1, yf2, yf3;
1003 mlib_s32 c0, c1, c2, c3, val0;
1004 mlib_s32 filterpos, k;
1005 mlib_s16 *fptr;
1006 mlib_u8 s0, s1, s2, s3;
1007
1008 CLIP(4);
1009 dstLineEnd = (DTYPE *) dstData + 4 * xRight;
1010
1011 for (k = 0; k < 4; k++) {
1012 mlib_s32 X1 = X;
1013 mlib_s32 Y1 = Y;
1014 DTYPE *dPtr = dstPixelPtr + k;
1015
1016 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1017 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1018
1019 xf0 = fptr[0];
1020 xf1 = fptr[1];
1021 xf2 = fptr[2];
1022 xf3 = fptr[3];
1023
1024 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1025 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1026
1027 yf0 = fptr[0];
1028 yf1 = fptr[1];
1029 yf2 = fptr[2];
1030 yf3 = fptr[3];
1031
1032 xSrc = (X1 >> MLIB_SHIFT) - 1;
1033 ySrc = (Y1 >> MLIB_SHIFT) - 1;
1034
1035 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1036 s0 = srcPixelPtr[0];
1037 s1 = srcPixelPtr[4];
1038 s2 = srcPixelPtr[8];
1039 s3 = srcPixelPtr[12];
1040
1041#ifdef __SUNPRO_C
1042#pragma pipeloop(0)
1043#endif /* __SUNPRO_C */
1044 for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
1045 X1 += dX;
1046 Y1 += dY;
1047
1048 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1049 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1050 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1051 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1052 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1053 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1054 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1055 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1056 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1057 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1058
1059 filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
1060 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1061
1062 xf0 = fptr[0];
1063 xf1 = fptr[1];
1064 xf2 = fptr[2];
1065 xf3 = fptr[3];
1066
1067 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1068
1069 filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
1070 fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
1071
1072 yf0 = fptr[0];
1073 yf1 = fptr[1];
1074 yf2 = fptr[2];
1075 yf3 = fptr[3];
1076
1077 S32_TO_U8_SAT(dPtr[0]);
1078
1079 xSrc = (X1 >> MLIB_SHIFT) - 1;
1080 ySrc = (Y1 >> MLIB_SHIFT) - 1;
1081
1082 srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
1083 s0 = srcPixelPtr[0];
1084 s1 = srcPixelPtr[4];
1085 s2 = srcPixelPtr[8];
1086 s3 = srcPixelPtr[12];
1087 }
1088
1089 c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
1090 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1091 c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1092 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1093 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1094 c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1095 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1096 srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
1097 c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
1098 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
1099
1100 val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
1101
1102 S32_TO_U8_SAT(dPtr[0]);
1103 }
1104 }
1105
1106 return MLIB_SUCCESS;
1107}
1108
1109#endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */
1110
1111/***************************************************************/
1112