1 | /* |
2 | * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | |
27 | /* |
28 | * FUNCTION |
29 | * Image affine transformation with Bicubic filtering |
30 | * SYNOPSIS |
31 | * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges, |
32 | * mlib_s32 *rightEdges, |
33 | * mlib_s32 *xStarts, |
34 | * mlib_s32 *yStarts, |
35 | * mlib_s32 *sides, |
36 | * mlib_u8 *dstData, |
37 | * mlib_u8 **lineAddr, |
38 | * mlib_s32 dstYStride, |
39 | * mlib_s32 is_affine, |
40 | * mlib_s32 srcYStride, |
41 | * mlib_filter filter) |
42 | * |
43 | * ARGUMENTS |
44 | * leftEdges array[dstHeight] of xLeft coordinates |
45 | * RightEdges array[dstHeight] of xRight coordinates |
46 | * xStarts array[dstHeight] of xStart * 65536 coordinates |
47 | * yStarts array[dstHeight] of yStart * 65536 coordinates |
48 | * sides output array[4]. sides[0] is yStart, sides[1] is yFinish, |
49 | * sides[2] is dx * 65536, sides[3] is dy * 65536 |
50 | * dstData pointer to the first pixel on (yStart - 1) line |
51 | * lineAddr array[srcHeight] of pointers to the first pixel on |
52 | * the corresponding lines |
53 | * dstYStride stride of destination image |
54 | * is_affine indicator (Affine - GridWarp) |
55 | * srcYStride stride of source image |
56 | * filter type of resampling filter |
57 | * |
58 | * DESCRIPTION |
59 | * The functions step along the lines from xLeft to xRight and apply |
60 | * the bicubic filtering. |
61 | * |
62 | */ |
63 | |
64 | #include "mlib_ImageAffine.h" |
65 | |
66 | #define DTYPE mlib_s16 |
67 | #define FILTER_BITS 9 |
68 | #define FUN_NAME(CHAN) mlib_ImageAffine_s16_##CHAN##_bc |
69 | |
70 | /***************************************************************/ |
71 | #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */ |
72 | |
73 | #undef FILTER_ELEM_BITS |
74 | #define FILTER_ELEM_BITS 4 |
75 | |
76 | #ifdef MLIB_USE_FTOI_CLAMPING |
77 | |
78 | #define SAT16(DST) \ |
79 | DST = ((mlib_s32)val0) >> 16 |
80 | |
81 | #else |
82 | |
83 | #define SAT16(DST) \ |
84 | if (val0 >= MLIB_S32_MAX) \ |
85 | DST = MLIB_S16_MAX; \ |
86 | else if (val0 <= MLIB_S32_MIN) \ |
87 | DST = MLIB_S16_MIN; \ |
88 | else \ |
89 | DST = ((mlib_s32)val0) >> 16 |
90 | |
91 | #endif /* MLIB_USE_FTOI_CLAMPING */ |
92 | |
93 | mlib_status FUN_NAME(1ch)(mlib_affine_param *param) |
94 | { |
95 | DECLAREVAR_BC(); |
96 | DTYPE *dstLineEnd; |
97 | const mlib_f32 *mlib_filters_table; |
98 | |
99 | if (filter == MLIB_BICUBIC) { |
100 | mlib_filters_table = mlib_filters_s16f_bc; |
101 | } |
102 | else { |
103 | mlib_filters_table = mlib_filters_s16f_bc2; |
104 | } |
105 | |
106 | for (j = yStart; j <= yFinish; j++) { |
107 | mlib_d64 xf0, xf1, xf2, xf3; |
108 | mlib_d64 yf0, yf1, yf2, yf3; |
109 | mlib_d64 c0, c1, c2, c3, val0; |
110 | mlib_s32 filterpos; |
111 | mlib_f32 *fptr; |
112 | mlib_s32 s0, s1, s2, s3; |
113 | mlib_s32 s4, s5, s6, s7; |
114 | |
115 | CLIP(1); |
116 | dstLineEnd = (DTYPE *) dstData + xRight; |
117 | |
118 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
119 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
120 | |
121 | xf0 = fptr[0]; |
122 | xf1 = fptr[1]; |
123 | xf2 = fptr[2]; |
124 | xf3 = fptr[3]; |
125 | |
126 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
127 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
128 | |
129 | yf0 = fptr[0]; |
130 | yf1 = fptr[1]; |
131 | yf2 = fptr[2]; |
132 | yf3 = fptr[3]; |
133 | |
134 | xSrc = (X >> MLIB_SHIFT) - 1; |
135 | ySrc = (Y >> MLIB_SHIFT) - 1; |
136 | |
137 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
138 | s0 = srcPixelPtr[0]; |
139 | s1 = srcPixelPtr[1]; |
140 | s2 = srcPixelPtr[2]; |
141 | s3 = srcPixelPtr[3]; |
142 | |
143 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
144 | s4 = srcPixelPtr[0]; |
145 | s5 = srcPixelPtr[1]; |
146 | s6 = srcPixelPtr[2]; |
147 | s7 = srcPixelPtr[3]; |
148 | |
149 | for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { |
150 | |
151 | X += dX; |
152 | Y += dY; |
153 | |
154 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
155 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
156 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
157 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
158 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
159 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
160 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
161 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
162 | |
163 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
164 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
165 | |
166 | xf0 = fptr[0]; |
167 | xf1 = fptr[1]; |
168 | xf2 = fptr[2]; |
169 | xf3 = fptr[3]; |
170 | |
171 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
172 | |
173 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
174 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
175 | |
176 | yf0 = fptr[0]; |
177 | yf1 = fptr[1]; |
178 | yf2 = fptr[2]; |
179 | yf3 = fptr[3]; |
180 | |
181 | SAT16(dstPixelPtr[0]); |
182 | |
183 | xSrc = (X >> MLIB_SHIFT) - 1; |
184 | ySrc = (Y >> MLIB_SHIFT) - 1; |
185 | |
186 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
187 | s0 = srcPixelPtr[0]; |
188 | s1 = srcPixelPtr[1]; |
189 | s2 = srcPixelPtr[2]; |
190 | s3 = srcPixelPtr[3]; |
191 | |
192 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
193 | s4 = srcPixelPtr[0]; |
194 | s5 = srcPixelPtr[1]; |
195 | s6 = srcPixelPtr[2]; |
196 | s7 = srcPixelPtr[3]; |
197 | } |
198 | |
199 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
200 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
201 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
202 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
203 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
204 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
205 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
206 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
207 | |
208 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
209 | SAT16(dstPixelPtr[0]); |
210 | } |
211 | |
212 | return MLIB_SUCCESS; |
213 | } |
214 | |
215 | mlib_status FUN_NAME(2ch)(mlib_affine_param *param) |
216 | { |
217 | DECLAREVAR_BC(); |
218 | DTYPE *dstLineEnd; |
219 | const mlib_f32 *mlib_filters_table; |
220 | |
221 | if (filter == MLIB_BICUBIC) { |
222 | mlib_filters_table = mlib_filters_s16f_bc; |
223 | } |
224 | else { |
225 | mlib_filters_table = mlib_filters_s16f_bc2; |
226 | } |
227 | |
228 | for (j = yStart; j <= yFinish; j++) { |
229 | mlib_d64 xf0, xf1, xf2, xf3; |
230 | mlib_d64 yf0, yf1, yf2, yf3; |
231 | mlib_d64 c0, c1, c2, c3, val0; |
232 | mlib_s32 filterpos, k; |
233 | mlib_f32 *fptr; |
234 | mlib_s32 s0, s1, s2, s3; |
235 | mlib_s32 s4, s5, s6, s7; |
236 | |
237 | CLIP(2); |
238 | dstLineEnd = (DTYPE *) dstData + 2 * xRight; |
239 | |
240 | for (k = 0; k < 2; k++) { |
241 | mlib_s32 X1 = X; |
242 | mlib_s32 Y1 = Y; |
243 | DTYPE *dPtr = dstPixelPtr + k; |
244 | |
245 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
246 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
247 | |
248 | xf0 = fptr[0]; |
249 | xf1 = fptr[1]; |
250 | xf2 = fptr[2]; |
251 | xf3 = fptr[3]; |
252 | |
253 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
254 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
255 | |
256 | yf0 = fptr[0]; |
257 | yf1 = fptr[1]; |
258 | yf2 = fptr[2]; |
259 | yf3 = fptr[3]; |
260 | |
261 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
262 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
263 | |
264 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
265 | s0 = srcPixelPtr[0]; |
266 | s1 = srcPixelPtr[2]; |
267 | s2 = srcPixelPtr[4]; |
268 | s3 = srcPixelPtr[6]; |
269 | |
270 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
271 | s4 = srcPixelPtr[0]; |
272 | s5 = srcPixelPtr[2]; |
273 | s6 = srcPixelPtr[4]; |
274 | s7 = srcPixelPtr[6]; |
275 | |
276 | for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { |
277 | |
278 | X1 += dX; |
279 | Y1 += dY; |
280 | |
281 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
282 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
283 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
284 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
285 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
286 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
287 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
288 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
289 | |
290 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
291 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
292 | |
293 | xf0 = fptr[0]; |
294 | xf1 = fptr[1]; |
295 | xf2 = fptr[2]; |
296 | xf3 = fptr[3]; |
297 | |
298 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
299 | |
300 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
301 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
302 | |
303 | yf0 = fptr[0]; |
304 | yf1 = fptr[1]; |
305 | yf2 = fptr[2]; |
306 | yf3 = fptr[3]; |
307 | |
308 | SAT16(dPtr[0]); |
309 | |
310 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
311 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
312 | |
313 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
314 | s0 = srcPixelPtr[0]; |
315 | s1 = srcPixelPtr[2]; |
316 | s2 = srcPixelPtr[4]; |
317 | s3 = srcPixelPtr[6]; |
318 | |
319 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
320 | s4 = srcPixelPtr[0]; |
321 | s5 = srcPixelPtr[2]; |
322 | s6 = srcPixelPtr[4]; |
323 | s7 = srcPixelPtr[6]; |
324 | } |
325 | |
326 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
327 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
328 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
329 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
330 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
331 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
332 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
333 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
334 | |
335 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
336 | SAT16(dPtr[0]); |
337 | } |
338 | } |
339 | |
340 | return MLIB_SUCCESS; |
341 | } |
342 | |
343 | mlib_status FUN_NAME(3ch)(mlib_affine_param *param) |
344 | { |
345 | DECLAREVAR_BC(); |
346 | DTYPE *dstLineEnd; |
347 | const mlib_f32 *mlib_filters_table; |
348 | |
349 | if (filter == MLIB_BICUBIC) { |
350 | mlib_filters_table = mlib_filters_s16f_bc; |
351 | } |
352 | else { |
353 | mlib_filters_table = mlib_filters_s16f_bc2; |
354 | } |
355 | |
356 | for (j = yStart; j <= yFinish; j++) { |
357 | mlib_d64 xf0, xf1, xf2, xf3; |
358 | mlib_d64 yf0, yf1, yf2, yf3; |
359 | mlib_d64 c0, c1, c2, c3, val0; |
360 | mlib_s32 filterpos, k; |
361 | mlib_f32 *fptr; |
362 | mlib_s32 s0, s1, s2, s3; |
363 | mlib_s32 s4, s5, s6, s7; |
364 | |
365 | CLIP(3); |
366 | dstLineEnd = (DTYPE *) dstData + 3 * xRight; |
367 | |
368 | for (k = 0; k < 3; k++) { |
369 | mlib_s32 X1 = X; |
370 | mlib_s32 Y1 = Y; |
371 | DTYPE *dPtr = dstPixelPtr + k; |
372 | |
373 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
374 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
375 | |
376 | xf0 = fptr[0]; |
377 | xf1 = fptr[1]; |
378 | xf2 = fptr[2]; |
379 | xf3 = fptr[3]; |
380 | |
381 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
382 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
383 | |
384 | yf0 = fptr[0]; |
385 | yf1 = fptr[1]; |
386 | yf2 = fptr[2]; |
387 | yf3 = fptr[3]; |
388 | |
389 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
390 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
391 | |
392 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
393 | s0 = srcPixelPtr[0]; |
394 | s1 = srcPixelPtr[3]; |
395 | s2 = srcPixelPtr[6]; |
396 | s3 = srcPixelPtr[9]; |
397 | |
398 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
399 | s4 = srcPixelPtr[0]; |
400 | s5 = srcPixelPtr[3]; |
401 | s6 = srcPixelPtr[6]; |
402 | s7 = srcPixelPtr[9]; |
403 | |
404 | for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { |
405 | |
406 | X1 += dX; |
407 | Y1 += dY; |
408 | |
409 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
410 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
411 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
412 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
413 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
414 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
415 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
416 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
417 | |
418 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
419 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
420 | |
421 | xf0 = fptr[0]; |
422 | xf1 = fptr[1]; |
423 | xf2 = fptr[2]; |
424 | xf3 = fptr[3]; |
425 | |
426 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
427 | |
428 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
429 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
430 | |
431 | yf0 = fptr[0]; |
432 | yf1 = fptr[1]; |
433 | yf2 = fptr[2]; |
434 | yf3 = fptr[3]; |
435 | |
436 | SAT16(dPtr[0]); |
437 | |
438 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
439 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
440 | |
441 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
442 | s0 = srcPixelPtr[0]; |
443 | s1 = srcPixelPtr[3]; |
444 | s2 = srcPixelPtr[6]; |
445 | s3 = srcPixelPtr[9]; |
446 | |
447 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
448 | s4 = srcPixelPtr[0]; |
449 | s5 = srcPixelPtr[3]; |
450 | s6 = srcPixelPtr[6]; |
451 | s7 = srcPixelPtr[9]; |
452 | } |
453 | |
454 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
455 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
456 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
457 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
458 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
459 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
460 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
461 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
462 | |
463 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
464 | SAT16(dPtr[0]); |
465 | } |
466 | } |
467 | |
468 | return MLIB_SUCCESS; |
469 | } |
470 | |
471 | mlib_status FUN_NAME(4ch)(mlib_affine_param *param) |
472 | { |
473 | DECLAREVAR_BC(); |
474 | DTYPE *dstLineEnd; |
475 | const mlib_f32 *mlib_filters_table; |
476 | |
477 | if (filter == MLIB_BICUBIC) { |
478 | mlib_filters_table = mlib_filters_s16f_bc; |
479 | } |
480 | else { |
481 | mlib_filters_table = mlib_filters_s16f_bc2; |
482 | } |
483 | |
484 | for (j = yStart; j <= yFinish; j++) { |
485 | mlib_d64 xf0, xf1, xf2, xf3; |
486 | mlib_d64 yf0, yf1, yf2, yf3; |
487 | mlib_d64 c0, c1, c2, c3, val0; |
488 | mlib_s32 filterpos, k; |
489 | mlib_f32 *fptr; |
490 | mlib_s32 s0, s1, s2, s3; |
491 | mlib_s32 s4, s5, s6, s7; |
492 | |
493 | CLIP(4); |
494 | dstLineEnd = (DTYPE *) dstData + 4 * xRight; |
495 | |
496 | for (k = 0; k < 4; k++) { |
497 | mlib_s32 X1 = X; |
498 | mlib_s32 Y1 = Y; |
499 | DTYPE *dPtr = dstPixelPtr + k; |
500 | |
501 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
502 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
503 | |
504 | xf0 = fptr[0]; |
505 | xf1 = fptr[1]; |
506 | xf2 = fptr[2]; |
507 | xf3 = fptr[3]; |
508 | |
509 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
510 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
511 | |
512 | yf0 = fptr[0]; |
513 | yf1 = fptr[1]; |
514 | yf2 = fptr[2]; |
515 | yf3 = fptr[3]; |
516 | |
517 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
518 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
519 | |
520 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
521 | s0 = srcPixelPtr[0]; |
522 | s1 = srcPixelPtr[4]; |
523 | s2 = srcPixelPtr[8]; |
524 | s3 = srcPixelPtr[12]; |
525 | |
526 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
527 | s4 = srcPixelPtr[0]; |
528 | s5 = srcPixelPtr[4]; |
529 | s6 = srcPixelPtr[8]; |
530 | s7 = srcPixelPtr[12]; |
531 | |
532 | for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { |
533 | |
534 | X1 += dX; |
535 | Y1 += dY; |
536 | |
537 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
538 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
539 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
540 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
541 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
542 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
543 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
544 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
545 | |
546 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
547 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
548 | |
549 | xf0 = fptr[0]; |
550 | xf1 = fptr[1]; |
551 | xf2 = fptr[2]; |
552 | xf3 = fptr[3]; |
553 | |
554 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
555 | |
556 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
557 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
558 | |
559 | yf0 = fptr[0]; |
560 | yf1 = fptr[1]; |
561 | yf2 = fptr[2]; |
562 | yf3 = fptr[3]; |
563 | |
564 | SAT16(dPtr[0]); |
565 | |
566 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
567 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
568 | |
569 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
570 | s0 = srcPixelPtr[0]; |
571 | s1 = srcPixelPtr[4]; |
572 | s2 = srcPixelPtr[8]; |
573 | s3 = srcPixelPtr[12]; |
574 | |
575 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
576 | s4 = srcPixelPtr[0]; |
577 | s5 = srcPixelPtr[4]; |
578 | s6 = srcPixelPtr[8]; |
579 | s7 = srcPixelPtr[12]; |
580 | } |
581 | |
582 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
583 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
584 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
585 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
586 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
587 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
588 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
589 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
590 | |
591 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
592 | SAT16(dPtr[0]); |
593 | } |
594 | } |
595 | |
596 | return MLIB_SUCCESS; |
597 | } |
598 | |
599 | #else /* for x86, using integer multiplies is faster */ |
600 | |
601 | #define SHIFT_X 15 |
602 | #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */ |
603 | |
604 | #define SHIFT_Y (15 + 15 - SHIFT_X) |
605 | #define ROUND_Y (1 << (SHIFT_Y - 1)) |
606 | |
607 | #define S32_TO_S16_SAT(DST) \ |
608 | if (val0 >= MLIB_S16_MAX) \ |
609 | DST = MLIB_S16_MAX; \ |
610 | else if (val0 <= MLIB_S16_MIN) \ |
611 | DST = MLIB_S16_MIN; \ |
612 | else \ |
613 | DST = (mlib_s16)val0 |
614 | |
615 | mlib_status FUN_NAME(1ch)(mlib_affine_param *param) |
616 | { |
617 | DECLAREVAR_BC(); |
618 | DTYPE *dstLineEnd; |
619 | const mlib_s16 *mlib_filters_table; |
620 | |
621 | if (filter == MLIB_BICUBIC) { |
622 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
623 | } |
624 | else { |
625 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
626 | } |
627 | |
628 | for (j = yStart; j <= yFinish; j++) { |
629 | mlib_s32 xf0, xf1, xf2, xf3; |
630 | mlib_s32 yf0, yf1, yf2, yf3; |
631 | mlib_s32 c0, c1, c2, c3, val0; |
632 | mlib_s32 filterpos; |
633 | mlib_s16 *fptr; |
634 | mlib_s32 s0, s1, s2, s3; |
635 | mlib_s32 s4, s5, s6, s7; |
636 | |
637 | CLIP(1); |
638 | dstLineEnd = (DTYPE *) dstData + xRight; |
639 | |
640 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
641 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
642 | |
643 | xf0 = fptr[0]; |
644 | xf1 = fptr[1]; |
645 | xf2 = fptr[2]; |
646 | xf3 = fptr[3]; |
647 | |
648 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
649 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
650 | |
651 | yf0 = fptr[0]; |
652 | yf1 = fptr[1]; |
653 | yf2 = fptr[2]; |
654 | yf3 = fptr[3]; |
655 | |
656 | xSrc = (X >> MLIB_SHIFT) - 1; |
657 | ySrc = (Y >> MLIB_SHIFT) - 1; |
658 | |
659 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
660 | s0 = srcPixelPtr[0]; |
661 | s1 = srcPixelPtr[1]; |
662 | s2 = srcPixelPtr[2]; |
663 | s3 = srcPixelPtr[3]; |
664 | |
665 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
666 | s4 = srcPixelPtr[0]; |
667 | s5 = srcPixelPtr[1]; |
668 | s6 = srcPixelPtr[2]; |
669 | s7 = srcPixelPtr[3]; |
670 | |
671 | for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { |
672 | |
673 | X += dX; |
674 | Y += dY; |
675 | |
676 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
677 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
678 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
679 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
680 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
681 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
682 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
683 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
684 | |
685 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
686 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
687 | |
688 | xf0 = fptr[0]; |
689 | xf1 = fptr[1]; |
690 | xf2 = fptr[2]; |
691 | xf3 = fptr[3]; |
692 | |
693 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
694 | |
695 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
696 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
697 | |
698 | yf0 = fptr[0]; |
699 | yf1 = fptr[1]; |
700 | yf2 = fptr[2]; |
701 | yf3 = fptr[3]; |
702 | |
703 | S32_TO_S16_SAT(dstPixelPtr[0]); |
704 | |
705 | xSrc = (X >> MLIB_SHIFT) - 1; |
706 | ySrc = (Y >> MLIB_SHIFT) - 1; |
707 | |
708 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
709 | s0 = srcPixelPtr[0]; |
710 | s1 = srcPixelPtr[1]; |
711 | s2 = srcPixelPtr[2]; |
712 | s3 = srcPixelPtr[3]; |
713 | |
714 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
715 | s4 = srcPixelPtr[0]; |
716 | s5 = srcPixelPtr[1]; |
717 | s6 = srcPixelPtr[2]; |
718 | s7 = srcPixelPtr[3]; |
719 | } |
720 | |
721 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
722 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
723 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
724 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
725 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
726 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
727 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
728 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
729 | |
730 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
731 | S32_TO_S16_SAT(dstPixelPtr[0]); |
732 | } |
733 | |
734 | return MLIB_SUCCESS; |
735 | } |
736 | |
737 | mlib_status FUN_NAME(2ch)(mlib_affine_param *param) |
738 | { |
739 | DECLAREVAR_BC(); |
740 | DTYPE *dstLineEnd; |
741 | const mlib_s16 *mlib_filters_table; |
742 | |
743 | if (filter == MLIB_BICUBIC) { |
744 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
745 | } |
746 | else { |
747 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
748 | } |
749 | |
750 | for (j = yStart; j <= yFinish; j++) { |
751 | mlib_s32 xf0, xf1, xf2, xf3; |
752 | mlib_s32 yf0, yf1, yf2, yf3; |
753 | mlib_s32 c0, c1, c2, c3, val0; |
754 | mlib_s32 filterpos, k; |
755 | mlib_s16 *fptr; |
756 | mlib_s32 s0, s1, s2, s3; |
757 | mlib_s32 s4, s5, s6, s7; |
758 | |
759 | CLIP(2); |
760 | dstLineEnd = (DTYPE *) dstData + 2 * xRight; |
761 | |
762 | for (k = 0; k < 2; k++) { |
763 | mlib_s32 X1 = X; |
764 | mlib_s32 Y1 = Y; |
765 | DTYPE *dPtr = dstPixelPtr + k; |
766 | |
767 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
768 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
769 | |
770 | xf0 = fptr[0]; |
771 | xf1 = fptr[1]; |
772 | xf2 = fptr[2]; |
773 | xf3 = fptr[3]; |
774 | |
775 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
776 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
777 | |
778 | yf0 = fptr[0]; |
779 | yf1 = fptr[1]; |
780 | yf2 = fptr[2]; |
781 | yf3 = fptr[3]; |
782 | |
783 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
784 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
785 | |
786 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
787 | s0 = srcPixelPtr[0]; |
788 | s1 = srcPixelPtr[2]; |
789 | s2 = srcPixelPtr[4]; |
790 | s3 = srcPixelPtr[6]; |
791 | |
792 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
793 | s4 = srcPixelPtr[0]; |
794 | s5 = srcPixelPtr[2]; |
795 | s6 = srcPixelPtr[4]; |
796 | s7 = srcPixelPtr[6]; |
797 | |
798 | for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { |
799 | |
800 | X1 += dX; |
801 | Y1 += dY; |
802 | |
803 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
804 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
805 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
806 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
807 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
808 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
809 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
810 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
811 | |
812 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
813 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
814 | |
815 | xf0 = fptr[0]; |
816 | xf1 = fptr[1]; |
817 | xf2 = fptr[2]; |
818 | xf3 = fptr[3]; |
819 | |
820 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
821 | |
822 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
823 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
824 | |
825 | yf0 = fptr[0]; |
826 | yf1 = fptr[1]; |
827 | yf2 = fptr[2]; |
828 | yf3 = fptr[3]; |
829 | |
830 | S32_TO_S16_SAT(dPtr[0]); |
831 | |
832 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
833 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
834 | |
835 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
836 | s0 = srcPixelPtr[0]; |
837 | s1 = srcPixelPtr[2]; |
838 | s2 = srcPixelPtr[4]; |
839 | s3 = srcPixelPtr[6]; |
840 | |
841 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
842 | s4 = srcPixelPtr[0]; |
843 | s5 = srcPixelPtr[2]; |
844 | s6 = srcPixelPtr[4]; |
845 | s7 = srcPixelPtr[6]; |
846 | } |
847 | |
848 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
849 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
850 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
851 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
852 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
853 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
854 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
855 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
856 | |
857 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
858 | S32_TO_S16_SAT(dPtr[0]); |
859 | } |
860 | } |
861 | |
862 | return MLIB_SUCCESS; |
863 | } |
864 | |
865 | mlib_status FUN_NAME(3ch)(mlib_affine_param *param) |
866 | { |
867 | DECLAREVAR_BC(); |
868 | DTYPE *dstLineEnd; |
869 | const mlib_s16 *mlib_filters_table; |
870 | |
871 | if (filter == MLIB_BICUBIC) { |
872 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
873 | } |
874 | else { |
875 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
876 | } |
877 | |
878 | for (j = yStart; j <= yFinish; j++) { |
879 | mlib_s32 xf0, xf1, xf2, xf3; |
880 | mlib_s32 yf0, yf1, yf2, yf3; |
881 | mlib_s32 c0, c1, c2, c3, val0; |
882 | mlib_s32 filterpos, k; |
883 | mlib_s16 *fptr; |
884 | mlib_s32 s0, s1, s2, s3; |
885 | mlib_s32 s4, s5, s6, s7; |
886 | |
887 | CLIP(3); |
888 | dstLineEnd = (DTYPE *) dstData + 3 * xRight; |
889 | |
890 | for (k = 0; k < 3; k++) { |
891 | mlib_s32 X1 = X; |
892 | mlib_s32 Y1 = Y; |
893 | DTYPE *dPtr = dstPixelPtr + k; |
894 | |
895 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
896 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
897 | |
898 | xf0 = fptr[0]; |
899 | xf1 = fptr[1]; |
900 | xf2 = fptr[2]; |
901 | xf3 = fptr[3]; |
902 | |
903 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
904 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
905 | |
906 | yf0 = fptr[0]; |
907 | yf1 = fptr[1]; |
908 | yf2 = fptr[2]; |
909 | yf3 = fptr[3]; |
910 | |
911 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
912 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
913 | |
914 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
915 | s0 = srcPixelPtr[0]; |
916 | s1 = srcPixelPtr[3]; |
917 | s2 = srcPixelPtr[6]; |
918 | s3 = srcPixelPtr[9]; |
919 | |
920 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
921 | s4 = srcPixelPtr[0]; |
922 | s5 = srcPixelPtr[3]; |
923 | s6 = srcPixelPtr[6]; |
924 | s7 = srcPixelPtr[9]; |
925 | |
926 | for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { |
927 | |
928 | X1 += dX; |
929 | Y1 += dY; |
930 | |
931 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
932 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
933 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
934 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
935 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
936 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
937 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
938 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
939 | |
940 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
941 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
942 | |
943 | xf0 = fptr[0]; |
944 | xf1 = fptr[1]; |
945 | xf2 = fptr[2]; |
946 | xf3 = fptr[3]; |
947 | |
948 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
949 | |
950 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
951 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
952 | |
953 | yf0 = fptr[0]; |
954 | yf1 = fptr[1]; |
955 | yf2 = fptr[2]; |
956 | yf3 = fptr[3]; |
957 | |
958 | S32_TO_S16_SAT(dPtr[0]); |
959 | |
960 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
961 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
962 | |
963 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
964 | s0 = srcPixelPtr[0]; |
965 | s1 = srcPixelPtr[3]; |
966 | s2 = srcPixelPtr[6]; |
967 | s3 = srcPixelPtr[9]; |
968 | |
969 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
970 | s4 = srcPixelPtr[0]; |
971 | s5 = srcPixelPtr[3]; |
972 | s6 = srcPixelPtr[6]; |
973 | s7 = srcPixelPtr[9]; |
974 | } |
975 | |
976 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
977 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
978 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
979 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
980 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
981 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
982 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
983 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
984 | |
985 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
986 | S32_TO_S16_SAT(dPtr[0]); |
987 | } |
988 | } |
989 | |
990 | return MLIB_SUCCESS; |
991 | } |
992 | |
993 | mlib_status FUN_NAME(4ch)(mlib_affine_param *param) |
994 | { |
995 | DECLAREVAR_BC(); |
996 | DTYPE *dstLineEnd; |
997 | const mlib_s16 *mlib_filters_table; |
998 | |
999 | if (filter == MLIB_BICUBIC) { |
1000 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
1001 | } |
1002 | else { |
1003 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
1004 | } |
1005 | |
1006 | for (j = yStart; j <= yFinish; j++) { |
1007 | mlib_s32 xf0, xf1, xf2, xf3; |
1008 | mlib_s32 yf0, yf1, yf2, yf3; |
1009 | mlib_s32 c0, c1, c2, c3, val0; |
1010 | mlib_s32 filterpos, k; |
1011 | mlib_s16 *fptr; |
1012 | mlib_s32 s0, s1, s2, s3; |
1013 | mlib_s32 s4, s5, s6, s7; |
1014 | |
1015 | CLIP(4); |
1016 | dstLineEnd = (DTYPE *) dstData + 4 * xRight; |
1017 | |
1018 | for (k = 0; k < 4; k++) { |
1019 | mlib_s32 X1 = X; |
1020 | mlib_s32 Y1 = Y; |
1021 | DTYPE *dPtr = dstPixelPtr + k; |
1022 | |
1023 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
1024 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1025 | |
1026 | xf0 = fptr[0]; |
1027 | xf1 = fptr[1]; |
1028 | xf2 = fptr[2]; |
1029 | xf3 = fptr[3]; |
1030 | |
1031 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
1032 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1033 | |
1034 | yf0 = fptr[0]; |
1035 | yf1 = fptr[1]; |
1036 | yf2 = fptr[2]; |
1037 | yf3 = fptr[3]; |
1038 | |
1039 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
1040 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
1041 | |
1042 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
1043 | s0 = srcPixelPtr[0]; |
1044 | s1 = srcPixelPtr[4]; |
1045 | s2 = srcPixelPtr[8]; |
1046 | s3 = srcPixelPtr[12]; |
1047 | |
1048 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1049 | s4 = srcPixelPtr[0]; |
1050 | s5 = srcPixelPtr[4]; |
1051 | s6 = srcPixelPtr[8]; |
1052 | s7 = srcPixelPtr[12]; |
1053 | |
1054 | for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { |
1055 | |
1056 | X1 += dX; |
1057 | Y1 += dY; |
1058 | |
1059 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
1060 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
1061 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1062 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1063 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1064 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1065 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1066 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1067 | |
1068 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
1069 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1070 | |
1071 | xf0 = fptr[0]; |
1072 | xf1 = fptr[1]; |
1073 | xf2 = fptr[2]; |
1074 | xf3 = fptr[3]; |
1075 | |
1076 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
1077 | |
1078 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
1079 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1080 | |
1081 | yf0 = fptr[0]; |
1082 | yf1 = fptr[1]; |
1083 | yf2 = fptr[2]; |
1084 | yf3 = fptr[3]; |
1085 | |
1086 | S32_TO_S16_SAT(dPtr[0]); |
1087 | |
1088 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
1089 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
1090 | |
1091 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
1092 | s0 = srcPixelPtr[0]; |
1093 | s1 = srcPixelPtr[4]; |
1094 | s2 = srcPixelPtr[8]; |
1095 | s3 = srcPixelPtr[12]; |
1096 | |
1097 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1098 | s4 = srcPixelPtr[0]; |
1099 | s5 = srcPixelPtr[4]; |
1100 | s6 = srcPixelPtr[8]; |
1101 | s7 = srcPixelPtr[12]; |
1102 | } |
1103 | |
1104 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
1105 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
1106 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1107 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1108 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1109 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1110 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1111 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1112 | |
1113 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
1114 | S32_TO_S16_SAT(dPtr[0]); |
1115 | } |
1116 | } |
1117 | |
1118 | return MLIB_SUCCESS; |
1119 | } |
1120 | |
1121 | #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */ |
1122 | |
1123 | /***************************************************************/ |
1124 | |