1 | /* |
2 | * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | |
27 | /* |
28 | * FUNCTION |
29 | * Image affine transformation with Bicubic filtering |
30 | * SYNOPSIS |
31 | * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges, |
32 | * mlib_s32 *rightEdges, |
33 | * mlib_s32 *xStarts, |
34 | * mlib_s32 *yStarts, |
35 | * mlib_s32 *sides, |
36 | * mlib_u8 *dstData, |
37 | * mlib_u8 **lineAddr, |
38 | * mlib_s32 dstYStride, |
39 | * mlib_s32 is_affine, |
40 | * mlib_s32 srcYStride, |
41 | * mlib_filter filter) |
42 | * |
43 | * ARGUMENTS |
44 | * leftEdges array[dstHeight] of xLeft coordinates |
45 | * RightEdges array[dstHeight] of xRight coordinates |
46 | * xStarts array[dstHeight] of xStart * 65536 coordinates |
47 | * yStarts array[dstHeight] of yStart * 65536 coordinates |
48 | * sides output array[4]. sides[0] is yStart, sides[1] is yFinish, |
49 | * sides[2] is dx * 65536, sides[3] is dy * 65536 |
50 | * dstData pointer to the first pixel on (yStart - 1) line |
51 | * lineAddr array[srcHeight] of pointers to the first pixel on |
52 | * the corresponding lines |
53 | * dstYStride stride of destination image |
54 | * is_affine indicator (Affine - GridWarp) |
55 | * srcYStride stride of source image |
56 | * filter type of resampling filter |
57 | * |
58 | * DESCRIPTION |
59 | * The functions step along the lines from xLeft to xRight and apply |
60 | * the bicubic filtering. |
61 | * |
62 | */ |
63 | |
64 | #include "mlib_ImageAffine.h" |
65 | |
66 | #define DTYPE mlib_u16 |
67 | |
68 | #define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bc |
69 | |
70 | #define FILTER_BITS 9 |
71 | |
72 | /***************************************************************/ |
73 | #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */ |
74 | |
75 | /***************************************************************/ |
76 | #undef FILTER_ELEM_BITS |
77 | #define FILTER_ELEM_BITS 4 |
78 | |
79 | /***************************************************************/ |
80 | #ifdef MLIB_USE_FTOI_CLAMPING |
81 | |
82 | #define SAT_U16(DST) \ |
83 | DST = ((mlib_s32)(val0 - (mlib_d64)0x7FFF8000) >> 16) ^ 0x8000 |
84 | |
85 | #else |
86 | |
87 | #define SAT_U16(DST) \ |
88 | if (val0 >= MLIB_U32_MAX) \ |
89 | DST = MLIB_U16_MAX; \ |
90 | else if (val0 <= MLIB_U32_MIN) \ |
91 | DST = MLIB_U16_MIN; \ |
92 | else \ |
93 | DST = ((mlib_u32)val0) >> 16 |
94 | |
95 | #endif /* MLIB_USE_FTOI_CLAMPING */ |
96 | |
97 | /***************************************************************/ |
98 | mlib_status FUN_NAME(1ch)(mlib_affine_param *param) |
99 | { |
100 | DECLAREVAR_BC(); |
101 | DTYPE *dstLineEnd; |
102 | const mlib_f32 *mlib_filters_table; |
103 | |
104 | if (filter == MLIB_BICUBIC) { |
105 | mlib_filters_table = mlib_filters_s16f_bc; |
106 | } |
107 | else { |
108 | mlib_filters_table = mlib_filters_s16f_bc2; |
109 | } |
110 | |
111 | for (j = yStart; j <= yFinish; j++) { |
112 | mlib_d64 xf0, xf1, xf2, xf3; |
113 | mlib_d64 yf0, yf1, yf2, yf3; |
114 | mlib_d64 c0, c1, c2, c3, val0; |
115 | mlib_s32 filterpos; |
116 | mlib_f32 *fptr; |
117 | mlib_s32 s0, s1, s2, s3; |
118 | mlib_s32 s4, s5, s6, s7; |
119 | |
120 | CLIP(1); |
121 | dstLineEnd = (DTYPE *) dstData + xRight; |
122 | |
123 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
124 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
125 | |
126 | xf0 = fptr[0]; |
127 | xf1 = fptr[1]; |
128 | xf2 = fptr[2]; |
129 | xf3 = fptr[3]; |
130 | |
131 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
132 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
133 | |
134 | yf0 = fptr[0]; |
135 | yf1 = fptr[1]; |
136 | yf2 = fptr[2]; |
137 | yf3 = fptr[3]; |
138 | |
139 | xSrc = (X >> MLIB_SHIFT) - 1; |
140 | ySrc = (Y >> MLIB_SHIFT) - 1; |
141 | |
142 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
143 | s0 = srcPixelPtr[0]; |
144 | s1 = srcPixelPtr[1]; |
145 | s2 = srcPixelPtr[2]; |
146 | s3 = srcPixelPtr[3]; |
147 | |
148 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
149 | s4 = srcPixelPtr[0]; |
150 | s5 = srcPixelPtr[1]; |
151 | s6 = srcPixelPtr[2]; |
152 | s7 = srcPixelPtr[3]; |
153 | |
154 | for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { |
155 | |
156 | X += dX; |
157 | Y += dY; |
158 | |
159 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
160 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
161 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
162 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
163 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
164 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
165 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
166 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
167 | |
168 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
169 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
170 | |
171 | xf0 = fptr[0]; |
172 | xf1 = fptr[1]; |
173 | xf2 = fptr[2]; |
174 | xf3 = fptr[3]; |
175 | |
176 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
177 | |
178 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
179 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
180 | |
181 | yf0 = fptr[0]; |
182 | yf1 = fptr[1]; |
183 | yf2 = fptr[2]; |
184 | yf3 = fptr[3]; |
185 | |
186 | SAT_U16(dstPixelPtr[0]); |
187 | |
188 | xSrc = (X >> MLIB_SHIFT) - 1; |
189 | ySrc = (Y >> MLIB_SHIFT) - 1; |
190 | |
191 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
192 | s0 = srcPixelPtr[0]; |
193 | s1 = srcPixelPtr[1]; |
194 | s2 = srcPixelPtr[2]; |
195 | s3 = srcPixelPtr[3]; |
196 | |
197 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
198 | s4 = srcPixelPtr[0]; |
199 | s5 = srcPixelPtr[1]; |
200 | s6 = srcPixelPtr[2]; |
201 | s7 = srcPixelPtr[3]; |
202 | } |
203 | |
204 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
205 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
206 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
207 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
208 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
209 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
210 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
211 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3); |
212 | |
213 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
214 | SAT_U16(dstPixelPtr[0]); |
215 | } |
216 | |
217 | return MLIB_SUCCESS; |
218 | } |
219 | |
220 | /***************************************************************/ |
221 | mlib_status FUN_NAME(2ch)(mlib_affine_param *param) |
222 | { |
223 | DECLAREVAR_BC(); |
224 | DTYPE *dstLineEnd; |
225 | const mlib_f32 *mlib_filters_table; |
226 | |
227 | if (filter == MLIB_BICUBIC) { |
228 | mlib_filters_table = mlib_filters_s16f_bc; |
229 | } |
230 | else { |
231 | mlib_filters_table = mlib_filters_s16f_bc2; |
232 | } |
233 | |
234 | for (j = yStart; j <= yFinish; j++) { |
235 | mlib_d64 xf0, xf1, xf2, xf3; |
236 | mlib_d64 yf0, yf1, yf2, yf3; |
237 | mlib_d64 c0, c1, c2, c3, val0; |
238 | mlib_s32 filterpos, k; |
239 | mlib_f32 *fptr; |
240 | mlib_s32 s0, s1, s2, s3; |
241 | mlib_s32 s4, s5, s6, s7; |
242 | |
243 | CLIP(2); |
244 | dstLineEnd = (DTYPE *) dstData + 2 * xRight; |
245 | |
246 | for (k = 0; k < 2; k++) { |
247 | mlib_s32 X1 = X; |
248 | mlib_s32 Y1 = Y; |
249 | DTYPE *dPtr = dstPixelPtr + k; |
250 | |
251 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
252 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
253 | |
254 | xf0 = fptr[0]; |
255 | xf1 = fptr[1]; |
256 | xf2 = fptr[2]; |
257 | xf3 = fptr[3]; |
258 | |
259 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
260 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
261 | |
262 | yf0 = fptr[0]; |
263 | yf1 = fptr[1]; |
264 | yf2 = fptr[2]; |
265 | yf3 = fptr[3]; |
266 | |
267 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
268 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
269 | |
270 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
271 | s0 = srcPixelPtr[0]; |
272 | s1 = srcPixelPtr[2]; |
273 | s2 = srcPixelPtr[4]; |
274 | s3 = srcPixelPtr[6]; |
275 | |
276 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
277 | s4 = srcPixelPtr[0]; |
278 | s5 = srcPixelPtr[2]; |
279 | s6 = srcPixelPtr[4]; |
280 | s7 = srcPixelPtr[6]; |
281 | |
282 | for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { |
283 | |
284 | X1 += dX; |
285 | Y1 += dY; |
286 | |
287 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
288 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
289 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
290 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
291 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
292 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
293 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
294 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
295 | |
296 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
297 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
298 | |
299 | xf0 = fptr[0]; |
300 | xf1 = fptr[1]; |
301 | xf2 = fptr[2]; |
302 | xf3 = fptr[3]; |
303 | |
304 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
305 | |
306 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
307 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
308 | |
309 | yf0 = fptr[0]; |
310 | yf1 = fptr[1]; |
311 | yf2 = fptr[2]; |
312 | yf3 = fptr[3]; |
313 | |
314 | SAT_U16(dPtr[0]); |
315 | |
316 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
317 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
318 | |
319 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
320 | s0 = srcPixelPtr[0]; |
321 | s1 = srcPixelPtr[2]; |
322 | s2 = srcPixelPtr[4]; |
323 | s3 = srcPixelPtr[6]; |
324 | |
325 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
326 | s4 = srcPixelPtr[0]; |
327 | s5 = srcPixelPtr[2]; |
328 | s6 = srcPixelPtr[4]; |
329 | s7 = srcPixelPtr[6]; |
330 | } |
331 | |
332 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
333 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
334 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
335 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
336 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
337 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
338 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
339 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3); |
340 | |
341 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
342 | SAT_U16(dPtr[0]); |
343 | } |
344 | } |
345 | |
346 | return MLIB_SUCCESS; |
347 | } |
348 | |
349 | /***************************************************************/ |
350 | mlib_status FUN_NAME(3ch)(mlib_affine_param *param) |
351 | { |
352 | DECLAREVAR_BC(); |
353 | DTYPE *dstLineEnd; |
354 | const mlib_f32 *mlib_filters_table; |
355 | |
356 | if (filter == MLIB_BICUBIC) { |
357 | mlib_filters_table = mlib_filters_s16f_bc; |
358 | } |
359 | else { |
360 | mlib_filters_table = mlib_filters_s16f_bc2; |
361 | } |
362 | |
363 | for (j = yStart; j <= yFinish; j++) { |
364 | mlib_d64 xf0, xf1, xf2, xf3; |
365 | mlib_d64 yf0, yf1, yf2, yf3; |
366 | mlib_d64 c0, c1, c2, c3, val0; |
367 | mlib_s32 filterpos, k; |
368 | mlib_f32 *fptr; |
369 | mlib_s32 s0, s1, s2, s3; |
370 | mlib_s32 s4, s5, s6, s7; |
371 | |
372 | CLIP(3); |
373 | dstLineEnd = (DTYPE *) dstData + 3 * xRight; |
374 | |
375 | for (k = 0; k < 3; k++) { |
376 | mlib_s32 X1 = X; |
377 | mlib_s32 Y1 = Y; |
378 | DTYPE *dPtr = dstPixelPtr + k; |
379 | |
380 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
381 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
382 | |
383 | xf0 = fptr[0]; |
384 | xf1 = fptr[1]; |
385 | xf2 = fptr[2]; |
386 | xf3 = fptr[3]; |
387 | |
388 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
389 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
390 | |
391 | yf0 = fptr[0]; |
392 | yf1 = fptr[1]; |
393 | yf2 = fptr[2]; |
394 | yf3 = fptr[3]; |
395 | |
396 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
397 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
398 | |
399 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
400 | s0 = srcPixelPtr[0]; |
401 | s1 = srcPixelPtr[3]; |
402 | s2 = srcPixelPtr[6]; |
403 | s3 = srcPixelPtr[9]; |
404 | |
405 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
406 | s4 = srcPixelPtr[0]; |
407 | s5 = srcPixelPtr[3]; |
408 | s6 = srcPixelPtr[6]; |
409 | s7 = srcPixelPtr[9]; |
410 | |
411 | for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { |
412 | |
413 | X1 += dX; |
414 | Y1 += dY; |
415 | |
416 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
417 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
418 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
419 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
420 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
421 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
422 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
423 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
424 | |
425 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
426 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
427 | |
428 | xf0 = fptr[0]; |
429 | xf1 = fptr[1]; |
430 | xf2 = fptr[2]; |
431 | xf3 = fptr[3]; |
432 | |
433 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
434 | |
435 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
436 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
437 | |
438 | yf0 = fptr[0]; |
439 | yf1 = fptr[1]; |
440 | yf2 = fptr[2]; |
441 | yf3 = fptr[3]; |
442 | |
443 | SAT_U16(dPtr[0]); |
444 | |
445 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
446 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
447 | |
448 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
449 | s0 = srcPixelPtr[0]; |
450 | s1 = srcPixelPtr[3]; |
451 | s2 = srcPixelPtr[6]; |
452 | s3 = srcPixelPtr[9]; |
453 | |
454 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
455 | s4 = srcPixelPtr[0]; |
456 | s5 = srcPixelPtr[3]; |
457 | s6 = srcPixelPtr[6]; |
458 | s7 = srcPixelPtr[9]; |
459 | } |
460 | |
461 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
462 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
463 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
464 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
465 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
466 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
467 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
468 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3); |
469 | |
470 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
471 | SAT_U16(dPtr[0]); |
472 | } |
473 | } |
474 | |
475 | return MLIB_SUCCESS; |
476 | } |
477 | |
478 | /***************************************************************/ |
479 | mlib_status FUN_NAME(4ch)(mlib_affine_param *param) |
480 | { |
481 | DECLAREVAR_BC(); |
482 | DTYPE *dstLineEnd; |
483 | const mlib_f32 *mlib_filters_table; |
484 | |
485 | if (filter == MLIB_BICUBIC) { |
486 | mlib_filters_table = mlib_filters_s16f_bc; |
487 | } |
488 | else { |
489 | mlib_filters_table = mlib_filters_s16f_bc2; |
490 | } |
491 | |
492 | for (j = yStart; j <= yFinish; j++) { |
493 | mlib_d64 xf0, xf1, xf2, xf3; |
494 | mlib_d64 yf0, yf1, yf2, yf3; |
495 | mlib_d64 c0, c1, c2, c3, val0; |
496 | mlib_s32 filterpos, k; |
497 | mlib_f32 *fptr; |
498 | mlib_s32 s0, s1, s2, s3; |
499 | mlib_s32 s4, s5, s6, s7; |
500 | |
501 | CLIP(4); |
502 | dstLineEnd = (DTYPE *) dstData + 4 * xRight; |
503 | |
504 | for (k = 0; k < 4; k++) { |
505 | mlib_s32 X1 = X; |
506 | mlib_s32 Y1 = Y; |
507 | DTYPE *dPtr = dstPixelPtr + k; |
508 | |
509 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
510 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
511 | |
512 | xf0 = fptr[0]; |
513 | xf1 = fptr[1]; |
514 | xf2 = fptr[2]; |
515 | xf3 = fptr[3]; |
516 | |
517 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
518 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
519 | |
520 | yf0 = fptr[0]; |
521 | yf1 = fptr[1]; |
522 | yf2 = fptr[2]; |
523 | yf3 = fptr[3]; |
524 | |
525 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
526 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
527 | |
528 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
529 | s0 = srcPixelPtr[0]; |
530 | s1 = srcPixelPtr[4]; |
531 | s2 = srcPixelPtr[8]; |
532 | s3 = srcPixelPtr[12]; |
533 | |
534 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
535 | s4 = srcPixelPtr[0]; |
536 | s5 = srcPixelPtr[4]; |
537 | s6 = srcPixelPtr[8]; |
538 | s7 = srcPixelPtr[12]; |
539 | |
540 | for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { |
541 | |
542 | X1 += dX; |
543 | Y1 += dY; |
544 | |
545 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
546 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
547 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
548 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
549 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
550 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
551 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
552 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
553 | |
554 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
555 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
556 | |
557 | xf0 = fptr[0]; |
558 | xf1 = fptr[1]; |
559 | xf2 = fptr[2]; |
560 | xf3 = fptr[3]; |
561 | |
562 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
563 | |
564 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
565 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
566 | |
567 | yf0 = fptr[0]; |
568 | yf1 = fptr[1]; |
569 | yf2 = fptr[2]; |
570 | yf3 = fptr[3]; |
571 | |
572 | SAT_U16(dPtr[0]); |
573 | |
574 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
575 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
576 | |
577 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
578 | s0 = srcPixelPtr[0]; |
579 | s1 = srcPixelPtr[4]; |
580 | s2 = srcPixelPtr[8]; |
581 | s3 = srcPixelPtr[12]; |
582 | |
583 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
584 | s4 = srcPixelPtr[0]; |
585 | s5 = srcPixelPtr[4]; |
586 | s6 = srcPixelPtr[8]; |
587 | s7 = srcPixelPtr[12]; |
588 | } |
589 | |
590 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3); |
591 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3); |
592 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
593 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
594 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
595 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
596 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
597 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3); |
598 | |
599 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
600 | SAT_U16(dPtr[0]); |
601 | } |
602 | } |
603 | |
604 | return MLIB_SUCCESS; |
605 | } |
606 | |
607 | #else /* for x86, using integer multiplies is faster */ |
608 | |
609 | #define SHIFT_X 15 |
610 | #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */ |
611 | |
612 | #define SHIFT_Y 14 |
613 | #define ROUND_Y (1 << (SHIFT_Y - 1)) |
614 | |
615 | #define S32_TO_U16_SAT(DST) \ |
616 | if (val0 >= MLIB_U16_MAX) \ |
617 | DST = MLIB_U16_MAX; \ |
618 | else if (val0 <= MLIB_U16_MIN) \ |
619 | DST = MLIB_U16_MIN; \ |
620 | else \ |
621 | DST = (mlib_u16)val0 |
622 | |
623 | /***************************************************************/ |
624 | mlib_status FUN_NAME(1ch)(mlib_affine_param *param) |
625 | { |
626 | DECLAREVAR_BC(); |
627 | DTYPE *dstLineEnd; |
628 | const mlib_s16 *mlib_filters_table; |
629 | |
630 | if (filter == MLIB_BICUBIC) { |
631 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
632 | } |
633 | else { |
634 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
635 | } |
636 | |
637 | for (j = yStart; j <= yFinish; j++) { |
638 | mlib_s32 xf0, xf1, xf2, xf3; |
639 | mlib_s32 yf0, yf1, yf2, yf3; |
640 | mlib_s32 c0, c1, c2, c3, val0; |
641 | mlib_s32 filterpos; |
642 | mlib_s16 *fptr; |
643 | mlib_s32 s0, s1, s2, s3; |
644 | mlib_s32 s4, s5, s6, s7; |
645 | |
646 | CLIP(1); |
647 | dstLineEnd = (DTYPE *) dstData + xRight; |
648 | |
649 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
650 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
651 | |
652 | xf0 = fptr[0] >> 1; |
653 | xf1 = fptr[1] >> 1; |
654 | xf2 = fptr[2] >> 1; |
655 | xf3 = fptr[3] >> 1; |
656 | |
657 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
658 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
659 | |
660 | yf0 = fptr[0]; |
661 | yf1 = fptr[1]; |
662 | yf2 = fptr[2]; |
663 | yf3 = fptr[3]; |
664 | |
665 | xSrc = (X >> MLIB_SHIFT) - 1; |
666 | ySrc = (Y >> MLIB_SHIFT) - 1; |
667 | |
668 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
669 | s0 = srcPixelPtr[0]; |
670 | s1 = srcPixelPtr[1]; |
671 | s2 = srcPixelPtr[2]; |
672 | s3 = srcPixelPtr[3]; |
673 | |
674 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
675 | s4 = srcPixelPtr[0]; |
676 | s5 = srcPixelPtr[1]; |
677 | s6 = srcPixelPtr[2]; |
678 | s7 = srcPixelPtr[3]; |
679 | |
680 | for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { |
681 | |
682 | X += dX; |
683 | Y += dY; |
684 | |
685 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
686 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
687 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
688 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
689 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
690 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
691 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
692 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
693 | |
694 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
695 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
696 | |
697 | xf0 = fptr[0] >> 1; |
698 | xf1 = fptr[1] >> 1; |
699 | xf2 = fptr[2] >> 1; |
700 | xf3 = fptr[3] >> 1; |
701 | |
702 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
703 | |
704 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
705 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
706 | |
707 | yf0 = fptr[0]; |
708 | yf1 = fptr[1]; |
709 | yf2 = fptr[2]; |
710 | yf3 = fptr[3]; |
711 | |
712 | S32_TO_U16_SAT(dstPixelPtr[0]); |
713 | |
714 | xSrc = (X >> MLIB_SHIFT) - 1; |
715 | ySrc = (Y >> MLIB_SHIFT) - 1; |
716 | |
717 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
718 | s0 = srcPixelPtr[0]; |
719 | s1 = srcPixelPtr[1]; |
720 | s2 = srcPixelPtr[2]; |
721 | s3 = srcPixelPtr[3]; |
722 | |
723 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
724 | s4 = srcPixelPtr[0]; |
725 | s5 = srcPixelPtr[1]; |
726 | s6 = srcPixelPtr[2]; |
727 | s7 = srcPixelPtr[3]; |
728 | } |
729 | |
730 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
731 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
732 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
733 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
734 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
735 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
736 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
737 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
738 | |
739 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
740 | S32_TO_U16_SAT(dstPixelPtr[0]); |
741 | } |
742 | |
743 | return MLIB_SUCCESS; |
744 | } |
745 | |
746 | /***************************************************************/ |
747 | mlib_status FUN_NAME(2ch)(mlib_affine_param *param) |
748 | { |
749 | DECLAREVAR_BC(); |
750 | DTYPE *dstLineEnd; |
751 | const mlib_s16 *mlib_filters_table; |
752 | |
753 | if (filter == MLIB_BICUBIC) { |
754 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
755 | } |
756 | else { |
757 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
758 | } |
759 | |
760 | for (j = yStart; j <= yFinish; j++) { |
761 | mlib_s32 xf0, xf1, xf2, xf3; |
762 | mlib_s32 yf0, yf1, yf2, yf3; |
763 | mlib_s32 c0, c1, c2, c3, val0; |
764 | mlib_s32 filterpos, k; |
765 | mlib_s16 *fptr; |
766 | mlib_s32 s0, s1, s2, s3; |
767 | mlib_s32 s4, s5, s6, s7; |
768 | |
769 | CLIP(2); |
770 | dstLineEnd = (DTYPE *) dstData + 2 * xRight; |
771 | |
772 | for (k = 0; k < 2; k++) { |
773 | mlib_s32 X1 = X; |
774 | mlib_s32 Y1 = Y; |
775 | DTYPE *dPtr = dstPixelPtr + k; |
776 | |
777 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
778 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
779 | |
780 | xf0 = fptr[0] >> 1; |
781 | xf1 = fptr[1] >> 1; |
782 | xf2 = fptr[2] >> 1; |
783 | xf3 = fptr[3] >> 1; |
784 | |
785 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
786 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
787 | |
788 | yf0 = fptr[0]; |
789 | yf1 = fptr[1]; |
790 | yf2 = fptr[2]; |
791 | yf3 = fptr[3]; |
792 | |
793 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
794 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
795 | |
796 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
797 | s0 = srcPixelPtr[0]; |
798 | s1 = srcPixelPtr[2]; |
799 | s2 = srcPixelPtr[4]; |
800 | s3 = srcPixelPtr[6]; |
801 | |
802 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
803 | s4 = srcPixelPtr[0]; |
804 | s5 = srcPixelPtr[2]; |
805 | s6 = srcPixelPtr[4]; |
806 | s7 = srcPixelPtr[6]; |
807 | |
808 | for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { |
809 | |
810 | X1 += dX; |
811 | Y1 += dY; |
812 | |
813 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
814 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
815 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
816 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
817 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
818 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
819 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
820 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
821 | |
822 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
823 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
824 | |
825 | xf0 = fptr[0] >> 1; |
826 | xf1 = fptr[1] >> 1; |
827 | xf2 = fptr[2] >> 1; |
828 | xf3 = fptr[3] >> 1; |
829 | |
830 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
831 | |
832 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
833 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
834 | |
835 | yf0 = fptr[0]; |
836 | yf1 = fptr[1]; |
837 | yf2 = fptr[2]; |
838 | yf3 = fptr[3]; |
839 | |
840 | S32_TO_U16_SAT(dPtr[0]); |
841 | |
842 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
843 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
844 | |
845 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
846 | s0 = srcPixelPtr[0]; |
847 | s1 = srcPixelPtr[2]; |
848 | s2 = srcPixelPtr[4]; |
849 | s3 = srcPixelPtr[6]; |
850 | |
851 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
852 | s4 = srcPixelPtr[0]; |
853 | s5 = srcPixelPtr[2]; |
854 | s6 = srcPixelPtr[4]; |
855 | s7 = srcPixelPtr[6]; |
856 | } |
857 | |
858 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
859 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
860 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
861 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
862 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
863 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
864 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
865 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
866 | |
867 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
868 | S32_TO_U16_SAT(dPtr[0]); |
869 | } |
870 | } |
871 | |
872 | return MLIB_SUCCESS; |
873 | } |
874 | |
875 | /***************************************************************/ |
876 | mlib_status FUN_NAME(3ch)(mlib_affine_param *param) |
877 | { |
878 | DECLAREVAR_BC(); |
879 | DTYPE *dstLineEnd; |
880 | const mlib_s16 *mlib_filters_table; |
881 | |
882 | if (filter == MLIB_BICUBIC) { |
883 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
884 | } |
885 | else { |
886 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
887 | } |
888 | |
889 | for (j = yStart; j <= yFinish; j++) { |
890 | mlib_s32 xf0, xf1, xf2, xf3; |
891 | mlib_s32 yf0, yf1, yf2, yf3; |
892 | mlib_s32 c0, c1, c2, c3, val0; |
893 | mlib_s32 filterpos, k; |
894 | mlib_s16 *fptr; |
895 | mlib_s32 s0, s1, s2, s3; |
896 | mlib_s32 s4, s5, s6, s7; |
897 | |
898 | CLIP(3); |
899 | dstLineEnd = (DTYPE *) dstData + 3 * xRight; |
900 | |
901 | for (k = 0; k < 3; k++) { |
902 | mlib_s32 X1 = X; |
903 | mlib_s32 Y1 = Y; |
904 | DTYPE *dPtr = dstPixelPtr + k; |
905 | |
906 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
907 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
908 | |
909 | xf0 = fptr[0] >> 1; |
910 | xf1 = fptr[1] >> 1; |
911 | xf2 = fptr[2] >> 1; |
912 | xf3 = fptr[3] >> 1; |
913 | |
914 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
915 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
916 | |
917 | yf0 = fptr[0]; |
918 | yf1 = fptr[1]; |
919 | yf2 = fptr[2]; |
920 | yf3 = fptr[3]; |
921 | |
922 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
923 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
924 | |
925 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
926 | s0 = srcPixelPtr[0]; |
927 | s1 = srcPixelPtr[3]; |
928 | s2 = srcPixelPtr[6]; |
929 | s3 = srcPixelPtr[9]; |
930 | |
931 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
932 | s4 = srcPixelPtr[0]; |
933 | s5 = srcPixelPtr[3]; |
934 | s6 = srcPixelPtr[6]; |
935 | s7 = srcPixelPtr[9]; |
936 | |
937 | for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { |
938 | |
939 | X1 += dX; |
940 | Y1 += dY; |
941 | |
942 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
943 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
944 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
945 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
946 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
947 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
948 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
949 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
950 | |
951 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
952 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
953 | |
954 | xf0 = fptr[0] >> 1; |
955 | xf1 = fptr[1] >> 1; |
956 | xf2 = fptr[2] >> 1; |
957 | xf3 = fptr[3] >> 1; |
958 | |
959 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
960 | |
961 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
962 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
963 | |
964 | yf0 = fptr[0]; |
965 | yf1 = fptr[1]; |
966 | yf2 = fptr[2]; |
967 | yf3 = fptr[3]; |
968 | |
969 | S32_TO_U16_SAT(dPtr[0]); |
970 | |
971 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
972 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
973 | |
974 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
975 | s0 = srcPixelPtr[0]; |
976 | s1 = srcPixelPtr[3]; |
977 | s2 = srcPixelPtr[6]; |
978 | s3 = srcPixelPtr[9]; |
979 | |
980 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
981 | s4 = srcPixelPtr[0]; |
982 | s5 = srcPixelPtr[3]; |
983 | s6 = srcPixelPtr[6]; |
984 | s7 = srcPixelPtr[9]; |
985 | } |
986 | |
987 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
988 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
989 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
990 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
991 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
992 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
993 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
994 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
995 | |
996 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
997 | S32_TO_U16_SAT(dPtr[0]); |
998 | } |
999 | } |
1000 | |
1001 | return MLIB_SUCCESS; |
1002 | } |
1003 | |
1004 | /***************************************************************/ |
1005 | mlib_status FUN_NAME(4ch)(mlib_affine_param *param) |
1006 | { |
1007 | DECLAREVAR_BC(); |
1008 | DTYPE *dstLineEnd; |
1009 | const mlib_s16 *mlib_filters_table; |
1010 | |
1011 | if (filter == MLIB_BICUBIC) { |
1012 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc; |
1013 | } |
1014 | else { |
1015 | mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2; |
1016 | } |
1017 | |
1018 | for (j = yStart; j <= yFinish; j++) { |
1019 | mlib_s32 xf0, xf1, xf2, xf3; |
1020 | mlib_s32 yf0, yf1, yf2, yf3; |
1021 | mlib_s32 c0, c1, c2, c3, val0; |
1022 | mlib_s32 filterpos, k; |
1023 | mlib_s16 *fptr; |
1024 | mlib_s32 s0, s1, s2, s3; |
1025 | mlib_s32 s4, s5, s6, s7; |
1026 | |
1027 | CLIP(4); |
1028 | dstLineEnd = (DTYPE *) dstData + 4 * xRight; |
1029 | |
1030 | for (k = 0; k < 4; k++) { |
1031 | mlib_s32 X1 = X; |
1032 | mlib_s32 Y1 = Y; |
1033 | DTYPE *dPtr = dstPixelPtr + k; |
1034 | |
1035 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
1036 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1037 | |
1038 | xf0 = fptr[0] >> 1; |
1039 | xf1 = fptr[1] >> 1; |
1040 | xf2 = fptr[2] >> 1; |
1041 | xf3 = fptr[3] >> 1; |
1042 | |
1043 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
1044 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1045 | |
1046 | yf0 = fptr[0]; |
1047 | yf1 = fptr[1]; |
1048 | yf2 = fptr[2]; |
1049 | yf3 = fptr[3]; |
1050 | |
1051 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
1052 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
1053 | |
1054 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
1055 | s0 = srcPixelPtr[0]; |
1056 | s1 = srcPixelPtr[4]; |
1057 | s2 = srcPixelPtr[8]; |
1058 | s3 = srcPixelPtr[12]; |
1059 | |
1060 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1061 | s4 = srcPixelPtr[0]; |
1062 | s5 = srcPixelPtr[4]; |
1063 | s6 = srcPixelPtr[8]; |
1064 | s7 = srcPixelPtr[12]; |
1065 | |
1066 | for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { |
1067 | |
1068 | X1 += dX; |
1069 | Y1 += dY; |
1070 | |
1071 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
1072 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
1073 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1074 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1075 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1076 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1077 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1078 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1079 | |
1080 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
1081 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1082 | |
1083 | xf0 = fptr[0] >> 1; |
1084 | xf1 = fptr[1] >> 1; |
1085 | xf2 = fptr[2] >> 1; |
1086 | xf3 = fptr[3] >> 1; |
1087 | |
1088 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
1089 | |
1090 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
1091 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1092 | |
1093 | yf0 = fptr[0]; |
1094 | yf1 = fptr[1]; |
1095 | yf2 = fptr[2]; |
1096 | yf3 = fptr[3]; |
1097 | |
1098 | S32_TO_U16_SAT(dPtr[0]); |
1099 | |
1100 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
1101 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
1102 | |
1103 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
1104 | s0 = srcPixelPtr[0]; |
1105 | s1 = srcPixelPtr[4]; |
1106 | s2 = srcPixelPtr[8]; |
1107 | s3 = srcPixelPtr[12]; |
1108 | |
1109 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1110 | s4 = srcPixelPtr[0]; |
1111 | s5 = srcPixelPtr[4]; |
1112 | s6 = srcPixelPtr[8]; |
1113 | s7 = srcPixelPtr[12]; |
1114 | } |
1115 | |
1116 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
1117 | c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X; |
1118 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1119 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1120 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1121 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1122 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1123 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1124 | |
1125 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
1126 | S32_TO_U16_SAT(dPtr[0]); |
1127 | } |
1128 | } |
1129 | |
1130 | return MLIB_SUCCESS; |
1131 | } |
1132 | |
1133 | #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */ |
1134 | |
1135 | /***************************************************************/ |
1136 | |