1 | /* |
2 | * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | |
27 | /* |
28 | * FUNCTION |
29 | * Image affine transformation with Bicubic filtering |
30 | * SYNOPSIS |
31 | * mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges, |
32 | * mlib_s32 *rightEdges, |
33 | * mlib_s32 *xStarts, |
34 | * mlib_s32 *yStarts, |
35 | * mlib_s32 *sides, |
36 | * mlib_u8 *dstData, |
37 | * mlib_u8 **lineAddr, |
38 | * mlib_s32 dstYStride, |
39 | * mlib_s32 is_affine, |
40 | * mlib_s32 srcYStride, |
41 | * mlib_filter filter) |
42 | * |
43 | * |
44 | * ARGUMENTS |
45 | * leftEdges array[dstHeight] of xLeft coordinates |
46 | * RightEdges array[dstHeight] of xRight coordinates |
47 | * xStarts array[dstHeight] of xStart * 65536 coordinates |
48 | * yStarts array[dstHeight] of yStart * 65536 coordinates |
49 | * sides output array[4]. sides[0] is yStart, sides[1] is yFinish, |
50 | * sides[2] is dx * 65536, sides[3] is dy * 65536 |
51 | * dstData pointer to the first pixel on (yStart - 1) line |
52 | * lineAddr array[srcHeight] of pointers to the first pixel on |
53 | * the corresponding lines |
54 | * dstYStride stride of destination image |
55 | * is_affine indicator (Affine - GridWarp) |
56 | * srcYStride stride of source image |
57 | * filter type of resampling filter |
58 | * |
59 | * DESCRIPTION |
60 | * The functions step along the lines from xLeft to xRight and apply |
61 | * the bicubic filtering. |
62 | * |
63 | */ |
64 | |
65 | #include "mlib_ImageAffine.h" |
66 | |
67 | #define DTYPE mlib_u8 |
68 | |
69 | #define FUN_NAME(CHAN) mlib_ImageAffine_u8_##CHAN##_bc |
70 | |
71 | #define FILTER_BITS 8 |
72 | |
73 | /***************************************************************/ |
74 | #ifdef __sparc /* for SPARC, using floating-point multiplies is faster */ |
75 | |
76 | #undef FILTER_ELEM_BITS |
77 | #define FILTER_ELEM_BITS 4 |
78 | |
79 | #ifdef MLIB_USE_FTOI_CLAMPING |
80 | |
81 | #define SAT8(DST) \ |
82 | DST = ((mlib_s32)(val0 - sat) >> 24) ^ 0x80 |
83 | |
84 | #else |
85 | |
86 | #define SAT8(DST) \ |
87 | val0 -= sat; \ |
88 | if (val0 >= MLIB_S32_MAX) \ |
89 | DST = MLIB_U8_MAX; \ |
90 | else if (val0 <= MLIB_S32_MIN) \ |
91 | DST = MLIB_U8_MIN; \ |
92 | else \ |
93 | DST = ((mlib_s32)val0 >> 24) ^ 0x80 |
94 | |
95 | #endif /* MLIB_USE_FTOI_CLAMPING */ |
96 | |
97 | /***************************************************************/ |
98 | mlib_status FUN_NAME(1ch)(mlib_affine_param *param) |
99 | { |
100 | DECLAREVAR_BC(); |
101 | DTYPE *dstLineEnd; |
102 | mlib_d64 sat = (mlib_d64) 0x7F800000; |
103 | const mlib_f32 *mlib_filters_table; |
104 | |
105 | if (filter == MLIB_BICUBIC) { |
106 | mlib_filters_table = mlib_filters_u8f_bc; |
107 | } |
108 | else { |
109 | mlib_filters_table = mlib_filters_u8f_bc2; |
110 | } |
111 | |
112 | for (j = yStart; j <= yFinish; j++) { |
113 | mlib_d64 xf0, xf1, xf2, xf3; |
114 | mlib_d64 yf0, yf1, yf2, yf3; |
115 | mlib_d64 c0, c1, c2, c3, val0; |
116 | mlib_s32 filterpos; |
117 | mlib_f32 *fptr; |
118 | mlib_u8 s0, s1, s2, s3; |
119 | |
120 | CLIP(1); |
121 | dstLineEnd = (DTYPE *) dstData + xRight; |
122 | |
123 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
124 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
125 | |
126 | xf0 = fptr[0]; |
127 | xf1 = fptr[1]; |
128 | xf2 = fptr[2]; |
129 | xf3 = fptr[3]; |
130 | |
131 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
132 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
133 | |
134 | yf0 = fptr[0]; |
135 | yf1 = fptr[1]; |
136 | yf2 = fptr[2]; |
137 | yf3 = fptr[3]; |
138 | |
139 | xSrc = (X >> MLIB_SHIFT) - 1; |
140 | ySrc = (Y >> MLIB_SHIFT) - 1; |
141 | |
142 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
143 | s0 = srcPixelPtr[0]; |
144 | s1 = srcPixelPtr[1]; |
145 | s2 = srcPixelPtr[2]; |
146 | s3 = srcPixelPtr[3]; |
147 | |
148 | #ifdef __SUNPRO_C |
149 | #pragma pipeloop(0) |
150 | #endif /* __SUNPRO_C */ |
151 | for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { |
152 | X += dX; |
153 | Y += dY; |
154 | |
155 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
156 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
157 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
158 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + |
159 | mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); |
160 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
161 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + |
162 | mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); |
163 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
164 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + |
165 | mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); |
166 | |
167 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
168 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
169 | |
170 | xf0 = fptr[0]; |
171 | xf1 = fptr[1]; |
172 | xf2 = fptr[2]; |
173 | xf3 = fptr[3]; |
174 | |
175 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
176 | |
177 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
178 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
179 | |
180 | yf0 = fptr[0]; |
181 | yf1 = fptr[1]; |
182 | yf2 = fptr[2]; |
183 | yf3 = fptr[3]; |
184 | |
185 | SAT8(dstPixelPtr[0]); |
186 | |
187 | xSrc = (X >> MLIB_SHIFT) - 1; |
188 | ySrc = (Y >> MLIB_SHIFT) - 1; |
189 | |
190 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
191 | s0 = srcPixelPtr[0]; |
192 | s1 = srcPixelPtr[1]; |
193 | s2 = srcPixelPtr[2]; |
194 | s3 = srcPixelPtr[3]; |
195 | } |
196 | |
197 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
198 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
199 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
200 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + |
201 | mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); |
202 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
203 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + |
204 | mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); |
205 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
206 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[1]] * xf1 + |
207 | mlib_U82D64[srcPixelPtr[2]] * xf2 + mlib_U82D64[srcPixelPtr[3]] * xf3); |
208 | |
209 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
210 | |
211 | SAT8(dstPixelPtr[0]); |
212 | } |
213 | |
214 | return MLIB_SUCCESS; |
215 | } |
216 | |
217 | /***************************************************************/ |
218 | mlib_status FUN_NAME(2ch)(mlib_affine_param *param) |
219 | { |
220 | DECLAREVAR_BC(); |
221 | DTYPE *dstLineEnd; |
222 | mlib_d64 sat = (mlib_d64) 0x7F800000; |
223 | const mlib_f32 *mlib_filters_table; |
224 | |
225 | if (filter == MLIB_BICUBIC) { |
226 | mlib_filters_table = mlib_filters_u8f_bc; |
227 | } |
228 | else { |
229 | mlib_filters_table = mlib_filters_u8f_bc2; |
230 | } |
231 | |
232 | for (j = yStart; j <= yFinish; j++) { |
233 | mlib_d64 xf0, xf1, xf2, xf3; |
234 | mlib_d64 yf0, yf1, yf2, yf3; |
235 | mlib_d64 c0, c1, c2, c3, val0; |
236 | mlib_s32 filterpos, k; |
237 | mlib_f32 *fptr; |
238 | mlib_u8 s0, s1, s2, s3; |
239 | |
240 | CLIP(2); |
241 | dstLineEnd = (DTYPE *) dstData + 2 * xRight; |
242 | |
243 | for (k = 0; k < 2; k++) { |
244 | mlib_s32 X1 = X; |
245 | mlib_s32 Y1 = Y; |
246 | DTYPE *dPtr = dstPixelPtr + k; |
247 | |
248 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
249 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
250 | |
251 | xf0 = fptr[0]; |
252 | xf1 = fptr[1]; |
253 | xf2 = fptr[2]; |
254 | xf3 = fptr[3]; |
255 | |
256 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
257 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
258 | |
259 | yf0 = fptr[0]; |
260 | yf1 = fptr[1]; |
261 | yf2 = fptr[2]; |
262 | yf3 = fptr[3]; |
263 | |
264 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
265 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
266 | |
267 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
268 | s0 = srcPixelPtr[0]; |
269 | s1 = srcPixelPtr[2]; |
270 | s2 = srcPixelPtr[4]; |
271 | s3 = srcPixelPtr[6]; |
272 | |
273 | #ifdef __SUNPRO_C |
274 | #pragma pipeloop(0) |
275 | #endif /* __SUNPRO_C */ |
276 | for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { |
277 | X1 += dX; |
278 | Y1 += dY; |
279 | |
280 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
281 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
282 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
283 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + |
284 | mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); |
285 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
286 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + |
287 | mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); |
288 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
289 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + |
290 | mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); |
291 | |
292 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
293 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
294 | |
295 | xf0 = fptr[0]; |
296 | xf1 = fptr[1]; |
297 | xf2 = fptr[2]; |
298 | xf3 = fptr[3]; |
299 | |
300 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
301 | |
302 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
303 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
304 | |
305 | yf0 = fptr[0]; |
306 | yf1 = fptr[1]; |
307 | yf2 = fptr[2]; |
308 | yf3 = fptr[3]; |
309 | |
310 | SAT8(dPtr[0]); |
311 | |
312 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
313 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
314 | |
315 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
316 | s0 = srcPixelPtr[0]; |
317 | s1 = srcPixelPtr[2]; |
318 | s2 = srcPixelPtr[4]; |
319 | s3 = srcPixelPtr[6]; |
320 | } |
321 | |
322 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
323 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
324 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
325 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + |
326 | mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); |
327 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
328 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + |
329 | mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); |
330 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
331 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[2]] * xf1 + |
332 | mlib_U82D64[srcPixelPtr[4]] * xf2 + mlib_U82D64[srcPixelPtr[6]] * xf3); |
333 | |
334 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
335 | |
336 | SAT8(dPtr[0]); |
337 | } |
338 | } |
339 | |
340 | return MLIB_SUCCESS; |
341 | } |
342 | |
343 | /***************************************************************/ |
344 | mlib_status FUN_NAME(3ch)(mlib_affine_param *param) |
345 | { |
346 | DECLAREVAR_BC(); |
347 | DTYPE *dstLineEnd; |
348 | mlib_d64 sat = (mlib_d64) 0x7F800000; |
349 | const mlib_f32 *mlib_filters_table; |
350 | |
351 | if (filter == MLIB_BICUBIC) { |
352 | mlib_filters_table = mlib_filters_u8f_bc; |
353 | } |
354 | else { |
355 | mlib_filters_table = mlib_filters_u8f_bc2; |
356 | } |
357 | |
358 | for (j = yStart; j <= yFinish; j++) { |
359 | mlib_d64 xf0, xf1, xf2, xf3; |
360 | mlib_d64 yf0, yf1, yf2, yf3; |
361 | mlib_d64 c0, c1, c2, c3, val0; |
362 | mlib_s32 filterpos, k; |
363 | mlib_f32 *fptr; |
364 | mlib_u8 s0, s1, s2, s3; |
365 | |
366 | CLIP(3); |
367 | dstLineEnd = (DTYPE *) dstData + 3 * xRight; |
368 | |
369 | for (k = 0; k < 3; k++) { |
370 | mlib_s32 X1 = X; |
371 | mlib_s32 Y1 = Y; |
372 | DTYPE *dPtr = dstPixelPtr + k; |
373 | |
374 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
375 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
376 | |
377 | xf0 = fptr[0]; |
378 | xf1 = fptr[1]; |
379 | xf2 = fptr[2]; |
380 | xf3 = fptr[3]; |
381 | |
382 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
383 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
384 | |
385 | yf0 = fptr[0]; |
386 | yf1 = fptr[1]; |
387 | yf2 = fptr[2]; |
388 | yf3 = fptr[3]; |
389 | |
390 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
391 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
392 | |
393 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
394 | s0 = srcPixelPtr[0]; |
395 | s1 = srcPixelPtr[3]; |
396 | s2 = srcPixelPtr[6]; |
397 | s3 = srcPixelPtr[9]; |
398 | |
399 | #ifdef __SUNPRO_C |
400 | #pragma pipeloop(0) |
401 | #endif /* __SUNPRO_C */ |
402 | for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { |
403 | X1 += dX; |
404 | Y1 += dY; |
405 | |
406 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
407 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
408 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
409 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + |
410 | mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); |
411 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
412 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + |
413 | mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); |
414 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
415 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + |
416 | mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); |
417 | |
418 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
419 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
420 | |
421 | xf0 = fptr[0]; |
422 | xf1 = fptr[1]; |
423 | xf2 = fptr[2]; |
424 | xf3 = fptr[3]; |
425 | |
426 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
427 | |
428 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
429 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
430 | |
431 | yf0 = fptr[0]; |
432 | yf1 = fptr[1]; |
433 | yf2 = fptr[2]; |
434 | yf3 = fptr[3]; |
435 | |
436 | SAT8(dPtr[0]); |
437 | |
438 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
439 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
440 | |
441 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
442 | s0 = srcPixelPtr[0]; |
443 | s1 = srcPixelPtr[3]; |
444 | s2 = srcPixelPtr[6]; |
445 | s3 = srcPixelPtr[9]; |
446 | } |
447 | |
448 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
449 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
450 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
451 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + |
452 | mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); |
453 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
454 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + |
455 | mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); |
456 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
457 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[3]] * xf1 + |
458 | mlib_U82D64[srcPixelPtr[6]] * xf2 + mlib_U82D64[srcPixelPtr[9]] * xf3); |
459 | |
460 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
461 | |
462 | SAT8(dPtr[0]); |
463 | } |
464 | } |
465 | |
466 | return MLIB_SUCCESS; |
467 | } |
468 | |
469 | /***************************************************************/ |
470 | mlib_status FUN_NAME(4ch)(mlib_affine_param *param) |
471 | { |
472 | DECLAREVAR_BC(); |
473 | DTYPE *dstLineEnd; |
474 | mlib_d64 sat = (mlib_d64) 0x7F800000; |
475 | const mlib_f32 *mlib_filters_table; |
476 | |
477 | if (filter == MLIB_BICUBIC) { |
478 | mlib_filters_table = mlib_filters_u8f_bc; |
479 | } |
480 | else { |
481 | mlib_filters_table = mlib_filters_u8f_bc2; |
482 | } |
483 | |
484 | for (j = yStart; j <= yFinish; j++) { |
485 | mlib_d64 xf0, xf1, xf2, xf3; |
486 | mlib_d64 yf0, yf1, yf2, yf3; |
487 | mlib_d64 c0, c1, c2, c3, val0; |
488 | mlib_s32 filterpos, k; |
489 | mlib_f32 *fptr; |
490 | mlib_u8 s0, s1, s2, s3; |
491 | |
492 | CLIP(4); |
493 | dstLineEnd = (DTYPE *) dstData + 4 * xRight; |
494 | |
495 | for (k = 0; k < 4; k++) { |
496 | mlib_s32 X1 = X; |
497 | mlib_s32 Y1 = Y; |
498 | DTYPE *dPtr = dstPixelPtr + k; |
499 | |
500 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
501 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
502 | |
503 | xf0 = fptr[0]; |
504 | xf1 = fptr[1]; |
505 | xf2 = fptr[2]; |
506 | xf3 = fptr[3]; |
507 | |
508 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
509 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
510 | |
511 | yf0 = fptr[0]; |
512 | yf1 = fptr[1]; |
513 | yf2 = fptr[2]; |
514 | yf3 = fptr[3]; |
515 | |
516 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
517 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
518 | |
519 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
520 | s0 = srcPixelPtr[0]; |
521 | s1 = srcPixelPtr[4]; |
522 | s2 = srcPixelPtr[8]; |
523 | s3 = srcPixelPtr[12]; |
524 | |
525 | #ifdef __SUNPRO_C |
526 | #pragma pipeloop(0) |
527 | #endif /* __SUNPRO_C */ |
528 | for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { |
529 | X1 += dX; |
530 | Y1 += dY; |
531 | |
532 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
533 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
534 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
535 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + |
536 | mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); |
537 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
538 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + |
539 | mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); |
540 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
541 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + |
542 | mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); |
543 | |
544 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
545 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
546 | |
547 | xf0 = fptr[0]; |
548 | xf1 = fptr[1]; |
549 | xf2 = fptr[2]; |
550 | xf3 = fptr[3]; |
551 | |
552 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
553 | |
554 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
555 | fptr = (mlib_f32 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
556 | |
557 | yf0 = fptr[0]; |
558 | yf1 = fptr[1]; |
559 | yf2 = fptr[2]; |
560 | yf3 = fptr[3]; |
561 | |
562 | SAT8(dPtr[0]); |
563 | |
564 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
565 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
566 | |
567 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
568 | s0 = srcPixelPtr[0]; |
569 | s1 = srcPixelPtr[4]; |
570 | s2 = srcPixelPtr[8]; |
571 | s3 = srcPixelPtr[12]; |
572 | } |
573 | |
574 | c0 = (mlib_U82D64[s0] * xf0 + mlib_U82D64[s1] * xf1 + |
575 | mlib_U82D64[s2] * xf2 + mlib_U82D64[s3] * xf3); |
576 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
577 | c1 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + |
578 | mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); |
579 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
580 | c2 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + |
581 | mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); |
582 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
583 | c3 = (mlib_U82D64[srcPixelPtr[0]] * xf0 + mlib_U82D64[srcPixelPtr[4]] * xf1 + |
584 | mlib_U82D64[srcPixelPtr[8]] * xf2 + mlib_U82D64[srcPixelPtr[12]] * xf3); |
585 | |
586 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3); |
587 | |
588 | SAT8(dPtr[0]); |
589 | } |
590 | } |
591 | |
592 | return MLIB_SUCCESS; |
593 | } |
594 | |
595 | #else /* for x86, using integer multiplies is faster */ |
596 | |
597 | #define SHIFT_X 12 |
598 | #define ROUND_X 0 /* (1 << (SHIFT_X - 1)) */ |
599 | |
600 | #define SHIFT_Y (14 + 14 - SHIFT_X) |
601 | #define ROUND_Y (1 << (SHIFT_Y - 1)) |
602 | |
603 | /***************************************************************/ |
604 | /* Test for the presence of any "1" bit in bits |
605 | 8 to 31 of val. If present, then val is either |
606 | negative or >255. If over/underflows of 8 bits |
607 | are uncommon, then this technique can be a win, |
608 | since only a single test, rather than two, is |
609 | necessary to determine if clamping is needed. |
610 | On the other hand, if over/underflows are common, |
611 | it adds an extra test. |
612 | */ |
613 | #define S32_TO_U8_SAT(DST) \ |
614 | if (val0 & 0xffffff00) { \ |
615 | if (val0 < MLIB_U8_MIN) \ |
616 | DST = MLIB_U8_MIN; \ |
617 | else \ |
618 | DST = MLIB_U8_MAX; \ |
619 | } else { \ |
620 | DST = (mlib_u8)val0; \ |
621 | } |
622 | |
623 | /***************************************************************/ |
624 | mlib_status FUN_NAME(1ch)(mlib_affine_param *param) |
625 | { |
626 | DECLAREVAR_BC(); |
627 | DTYPE *dstLineEnd; |
628 | const mlib_s16 *mlib_filters_table; |
629 | |
630 | if (filter == MLIB_BICUBIC) { |
631 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; |
632 | } |
633 | else { |
634 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; |
635 | } |
636 | |
637 | for (j = yStart; j <= yFinish; j++) { |
638 | mlib_s32 xf0, xf1, xf2, xf3; |
639 | mlib_s32 yf0, yf1, yf2, yf3; |
640 | mlib_s32 c0, c1, c2, c3, val0; |
641 | mlib_s32 filterpos; |
642 | mlib_s16 *fptr; |
643 | mlib_u8 s0, s1, s2, s3; |
644 | |
645 | CLIP(1); |
646 | dstLineEnd = (DTYPE *) dstData + xRight; |
647 | |
648 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
649 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
650 | |
651 | xf0 = fptr[0]; |
652 | xf1 = fptr[1]; |
653 | xf2 = fptr[2]; |
654 | xf3 = fptr[3]; |
655 | |
656 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
657 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
658 | |
659 | yf0 = fptr[0]; |
660 | yf1 = fptr[1]; |
661 | yf2 = fptr[2]; |
662 | yf3 = fptr[3]; |
663 | |
664 | xSrc = (X >> MLIB_SHIFT) - 1; |
665 | ySrc = (Y >> MLIB_SHIFT) - 1; |
666 | |
667 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
668 | s0 = srcPixelPtr[0]; |
669 | s1 = srcPixelPtr[1]; |
670 | s2 = srcPixelPtr[2]; |
671 | s3 = srcPixelPtr[3]; |
672 | |
673 | #ifdef __SUNPRO_C |
674 | #pragma pipeloop(0) |
675 | #endif /* __SUNPRO_C */ |
676 | for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) { |
677 | X += dX; |
678 | Y += dY; |
679 | |
680 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
681 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
682 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
683 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
684 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
685 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
686 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
687 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
688 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
689 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
690 | |
691 | filterpos = (X >> FILTER_SHIFT) & FILTER_MASK; |
692 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
693 | |
694 | xf0 = fptr[0]; |
695 | xf1 = fptr[1]; |
696 | xf2 = fptr[2]; |
697 | xf3 = fptr[3]; |
698 | |
699 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
700 | |
701 | filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK; |
702 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
703 | |
704 | yf0 = fptr[0]; |
705 | yf1 = fptr[1]; |
706 | yf2 = fptr[2]; |
707 | yf3 = fptr[3]; |
708 | |
709 | S32_TO_U8_SAT(dstPixelPtr[0]); |
710 | |
711 | xSrc = (X >> MLIB_SHIFT) - 1; |
712 | ySrc = (Y >> MLIB_SHIFT) - 1; |
713 | |
714 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc; |
715 | s0 = srcPixelPtr[0]; |
716 | s1 = srcPixelPtr[1]; |
717 | s2 = srcPixelPtr[2]; |
718 | s3 = srcPixelPtr[3]; |
719 | } |
720 | |
721 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
722 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
723 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
724 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
725 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
726 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
727 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
728 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
729 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 + |
730 | srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X; |
731 | |
732 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
733 | |
734 | S32_TO_U8_SAT(dstPixelPtr[0]); |
735 | } |
736 | |
737 | return MLIB_SUCCESS; |
738 | } |
739 | |
740 | /***************************************************************/ |
741 | mlib_status FUN_NAME(2ch)(mlib_affine_param *param) |
742 | { |
743 | DECLAREVAR_BC(); |
744 | DTYPE *dstLineEnd; |
745 | const mlib_s16 *mlib_filters_table; |
746 | |
747 | if (filter == MLIB_BICUBIC) { |
748 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; |
749 | } |
750 | else { |
751 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; |
752 | } |
753 | |
754 | for (j = yStart; j <= yFinish; j++) { |
755 | mlib_s32 xf0, xf1, xf2, xf3; |
756 | mlib_s32 yf0, yf1, yf2, yf3; |
757 | mlib_s32 c0, c1, c2, c3, val0; |
758 | mlib_s32 filterpos, k; |
759 | mlib_s16 *fptr; |
760 | mlib_u8 s0, s1, s2, s3; |
761 | |
762 | CLIP(2); |
763 | dstLineEnd = (DTYPE *) dstData + 2 * xRight; |
764 | |
765 | for (k = 0; k < 2; k++) { |
766 | mlib_s32 X1 = X; |
767 | mlib_s32 Y1 = Y; |
768 | DTYPE *dPtr = dstPixelPtr + k; |
769 | |
770 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
771 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
772 | |
773 | xf0 = fptr[0]; |
774 | xf1 = fptr[1]; |
775 | xf2 = fptr[2]; |
776 | xf3 = fptr[3]; |
777 | |
778 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
779 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
780 | |
781 | yf0 = fptr[0]; |
782 | yf1 = fptr[1]; |
783 | yf2 = fptr[2]; |
784 | yf3 = fptr[3]; |
785 | |
786 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
787 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
788 | |
789 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
790 | s0 = srcPixelPtr[0]; |
791 | s1 = srcPixelPtr[2]; |
792 | s2 = srcPixelPtr[4]; |
793 | s3 = srcPixelPtr[6]; |
794 | |
795 | #ifdef __SUNPRO_C |
796 | #pragma pipeloop(0) |
797 | #endif /* __SUNPRO_C */ |
798 | for (; dPtr <= (dstLineEnd - 1); dPtr += 2) { |
799 | X1 += dX; |
800 | Y1 += dY; |
801 | |
802 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
803 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
804 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
805 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
806 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
807 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
808 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
809 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
810 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
811 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
812 | |
813 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
814 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
815 | |
816 | xf0 = fptr[0]; |
817 | xf1 = fptr[1]; |
818 | xf2 = fptr[2]; |
819 | xf3 = fptr[3]; |
820 | |
821 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
822 | |
823 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
824 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
825 | |
826 | yf0 = fptr[0]; |
827 | yf1 = fptr[1]; |
828 | yf2 = fptr[2]; |
829 | yf3 = fptr[3]; |
830 | |
831 | S32_TO_U8_SAT(dPtr[0]); |
832 | |
833 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
834 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
835 | |
836 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k; |
837 | s0 = srcPixelPtr[0]; |
838 | s1 = srcPixelPtr[2]; |
839 | s2 = srcPixelPtr[4]; |
840 | s3 = srcPixelPtr[6]; |
841 | } |
842 | |
843 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
844 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
845 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
846 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
847 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
848 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
849 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
850 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
851 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 + |
852 | srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X; |
853 | |
854 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
855 | |
856 | S32_TO_U8_SAT(dPtr[0]); |
857 | } |
858 | } |
859 | |
860 | return MLIB_SUCCESS; |
861 | } |
862 | |
863 | /***************************************************************/ |
864 | mlib_status FUN_NAME(3ch)(mlib_affine_param *param) |
865 | { |
866 | DECLAREVAR_BC(); |
867 | DTYPE *dstLineEnd; |
868 | const mlib_s16 *mlib_filters_table; |
869 | |
870 | if (filter == MLIB_BICUBIC) { |
871 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; |
872 | } |
873 | else { |
874 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; |
875 | } |
876 | |
877 | for (j = yStart; j <= yFinish; j++) { |
878 | mlib_s32 xf0, xf1, xf2, xf3; |
879 | mlib_s32 yf0, yf1, yf2, yf3; |
880 | mlib_s32 c0, c1, c2, c3, val0; |
881 | mlib_s32 filterpos, k; |
882 | mlib_s16 *fptr; |
883 | mlib_u8 s0, s1, s2, s3; |
884 | |
885 | CLIP(3); |
886 | dstLineEnd = (DTYPE *) dstData + 3 * xRight; |
887 | |
888 | for (k = 0; k < 3; k++) { |
889 | mlib_s32 X1 = X; |
890 | mlib_s32 Y1 = Y; |
891 | DTYPE *dPtr = dstPixelPtr + k; |
892 | |
893 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
894 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
895 | |
896 | xf0 = fptr[0]; |
897 | xf1 = fptr[1]; |
898 | xf2 = fptr[2]; |
899 | xf3 = fptr[3]; |
900 | |
901 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
902 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
903 | |
904 | yf0 = fptr[0]; |
905 | yf1 = fptr[1]; |
906 | yf2 = fptr[2]; |
907 | yf3 = fptr[3]; |
908 | |
909 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
910 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
911 | |
912 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
913 | s0 = srcPixelPtr[0]; |
914 | s1 = srcPixelPtr[3]; |
915 | s2 = srcPixelPtr[6]; |
916 | s3 = srcPixelPtr[9]; |
917 | |
918 | #ifdef __SUNPRO_C |
919 | #pragma pipeloop(0) |
920 | #endif /* __SUNPRO_C */ |
921 | for (; dPtr <= (dstLineEnd - 1); dPtr += 3) { |
922 | X1 += dX; |
923 | Y1 += dY; |
924 | |
925 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
926 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
927 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
928 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
929 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
930 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
931 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
932 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
933 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
934 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
935 | |
936 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
937 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
938 | |
939 | xf0 = fptr[0]; |
940 | xf1 = fptr[1]; |
941 | xf2 = fptr[2]; |
942 | xf3 = fptr[3]; |
943 | |
944 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
945 | |
946 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
947 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
948 | |
949 | yf0 = fptr[0]; |
950 | yf1 = fptr[1]; |
951 | yf2 = fptr[2]; |
952 | yf3 = fptr[3]; |
953 | |
954 | S32_TO_U8_SAT(dPtr[0]); |
955 | |
956 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
957 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
958 | |
959 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k; |
960 | s0 = srcPixelPtr[0]; |
961 | s1 = srcPixelPtr[3]; |
962 | s2 = srcPixelPtr[6]; |
963 | s3 = srcPixelPtr[9]; |
964 | } |
965 | |
966 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
967 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
968 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
969 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
970 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
971 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
972 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
973 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
974 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 + |
975 | srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X; |
976 | |
977 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
978 | |
979 | S32_TO_U8_SAT(dPtr[0]); |
980 | } |
981 | } |
982 | |
983 | return MLIB_SUCCESS; |
984 | } |
985 | |
986 | /***************************************************************/ |
987 | mlib_status FUN_NAME(4ch)(mlib_affine_param *param) |
988 | { |
989 | DECLAREVAR_BC(); |
990 | DTYPE *dstLineEnd; |
991 | const mlib_s16 *mlib_filters_table; |
992 | |
993 | if (filter == MLIB_BICUBIC) { |
994 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc; |
995 | } |
996 | else { |
997 | mlib_filters_table = (mlib_s16 *) mlib_filters_u8_bc2; |
998 | } |
999 | |
1000 | for (j = yStart; j <= yFinish; j++) { |
1001 | mlib_s32 xf0, xf1, xf2, xf3; |
1002 | mlib_s32 yf0, yf1, yf2, yf3; |
1003 | mlib_s32 c0, c1, c2, c3, val0; |
1004 | mlib_s32 filterpos, k; |
1005 | mlib_s16 *fptr; |
1006 | mlib_u8 s0, s1, s2, s3; |
1007 | |
1008 | CLIP(4); |
1009 | dstLineEnd = (DTYPE *) dstData + 4 * xRight; |
1010 | |
1011 | for (k = 0; k < 4; k++) { |
1012 | mlib_s32 X1 = X; |
1013 | mlib_s32 Y1 = Y; |
1014 | DTYPE *dPtr = dstPixelPtr + k; |
1015 | |
1016 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
1017 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1018 | |
1019 | xf0 = fptr[0]; |
1020 | xf1 = fptr[1]; |
1021 | xf2 = fptr[2]; |
1022 | xf3 = fptr[3]; |
1023 | |
1024 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
1025 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1026 | |
1027 | yf0 = fptr[0]; |
1028 | yf1 = fptr[1]; |
1029 | yf2 = fptr[2]; |
1030 | yf3 = fptr[3]; |
1031 | |
1032 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
1033 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
1034 | |
1035 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
1036 | s0 = srcPixelPtr[0]; |
1037 | s1 = srcPixelPtr[4]; |
1038 | s2 = srcPixelPtr[8]; |
1039 | s3 = srcPixelPtr[12]; |
1040 | |
1041 | #ifdef __SUNPRO_C |
1042 | #pragma pipeloop(0) |
1043 | #endif /* __SUNPRO_C */ |
1044 | for (; dPtr <= (dstLineEnd - 1); dPtr += 4) { |
1045 | X1 += dX; |
1046 | Y1 += dY; |
1047 | |
1048 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
1049 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1050 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1051 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1052 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1053 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1054 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1055 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1056 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1057 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1058 | |
1059 | filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK; |
1060 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1061 | |
1062 | xf0 = fptr[0]; |
1063 | xf1 = fptr[1]; |
1064 | xf2 = fptr[2]; |
1065 | xf3 = fptr[3]; |
1066 | |
1067 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
1068 | |
1069 | filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK; |
1070 | fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos); |
1071 | |
1072 | yf0 = fptr[0]; |
1073 | yf1 = fptr[1]; |
1074 | yf2 = fptr[2]; |
1075 | yf3 = fptr[3]; |
1076 | |
1077 | S32_TO_U8_SAT(dPtr[0]); |
1078 | |
1079 | xSrc = (X1 >> MLIB_SHIFT) - 1; |
1080 | ySrc = (Y1 >> MLIB_SHIFT) - 1; |
1081 | |
1082 | srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k; |
1083 | s0 = srcPixelPtr[0]; |
1084 | s1 = srcPixelPtr[4]; |
1085 | s2 = srcPixelPtr[8]; |
1086 | s3 = srcPixelPtr[12]; |
1087 | } |
1088 | |
1089 | c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X; |
1090 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1091 | c1 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1092 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1093 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1094 | c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1095 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1096 | srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride); |
1097 | c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 + |
1098 | srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X; |
1099 | |
1100 | val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y; |
1101 | |
1102 | S32_TO_U8_SAT(dPtr[0]); |
1103 | } |
1104 | } |
1105 | |
1106 | return MLIB_SUCCESS; |
1107 | } |
1108 | |
1109 | #endif /* __sparc ( for SPARC, using floating-point multiplies is faster ) */ |
1110 | |
1111 | /***************************************************************/ |
1112 | |