1 | /* |
2 | * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | |
27 | /* |
28 | * FUNCTION |
29 | * mlib_ImageConvMxN_Fp - image convolution with edge condition |
30 | * |
31 | * SYNOPSIS |
32 | * mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, |
33 | * const mlib_image *src, |
34 | * const mlib_d64 *kernel, |
35 | * mlib_s32 m, |
36 | * mlib_s32 n, |
37 | * mlib_s32 dm, |
38 | * mlib_s32 dn, |
39 | * mlib_s32 cmask, |
40 | * mlib_edge edge) |
41 | * |
42 | * ARGUMENTS |
43 | * dst Pointer to destination image. |
44 | * src Pointer to source image. |
45 | * m Kernel width (m must be not less than 1). |
46 | * n Kernel height (n must be not less than 1). |
47 | * dm, dn Position of key element in convolution kernel. |
48 | * kernel Pointer to convolution kernel. |
49 | * cmask Channel mask to indicate the channels to be convolved. |
50 | * Each bit of which represents a channel in the image. The |
51 | * channels corresponded to 1 bits are those to be processed. |
52 | * edge Type of edge condition. |
53 | * |
54 | * DESCRIPTION |
55 | * 2-D convolution, MxN kernel. |
56 | * |
57 | * The center of the source image is mapped to the center of the |
58 | * destination image. |
59 | * The unselected channels are not overwritten. If both src and dst have |
60 | * just one channel, cmask is ignored. |
61 | * |
62 | * The edge condition can be one of the following: |
63 | * MLIB_EDGE_DST_NO_WRITE (default) |
64 | * MLIB_EDGE_DST_FILL_ZERO |
65 | * MLIB_EDGE_DST_COPY_SRC |
66 | * MLIB_EDGE_SRC_EXTEND |
67 | * |
68 | * RESTRICTION |
69 | * The src and the dst must be the same type and have same number |
70 | * of channels (1, 2, 3, or 4). |
71 | * m >= 1, n >= 1, |
72 | * 0 <= dm < m, 0 <= dn < n. |
73 | */ |
74 | |
75 | #include "mlib_image.h" |
76 | #include "mlib_ImageCheck.h" |
77 | #include "mlib_SysMath.h" |
78 | #include "mlib_ImageConv.h" |
79 | |
80 | /***************************************************************/ |
81 | static void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, |
82 | const mlib_f32 *src, |
83 | const mlib_d64 *kernel, |
84 | mlib_s32 n, |
85 | mlib_s32 m, |
86 | mlib_s32 nch, |
87 | mlib_s32 dnch); |
88 | |
89 | static void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, |
90 | const mlib_f32 *src, |
91 | mlib_s32 n, |
92 | mlib_s32 nch, |
93 | mlib_s32 dx_l, |
94 | mlib_s32 dx_r); |
95 | |
96 | static void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, |
97 | const mlib_d64 *src, |
98 | const mlib_d64 *kernel, |
99 | mlib_s32 n, |
100 | mlib_s32 m, |
101 | mlib_s32 nch, |
102 | mlib_s32 dnch); |
103 | |
104 | static void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, |
105 | const mlib_d64 *src, |
106 | mlib_s32 n, |
107 | mlib_s32 nch, |
108 | mlib_s32 dx_l, |
109 | mlib_s32 dx_r); |
110 | |
111 | /***************************************************************/ |
112 | #if 0 |
113 | static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, |
114 | mlib_f32 *vdst, |
115 | const mlib_f32 *src, |
116 | const mlib_d64 *hfilter, |
117 | const mlib_d64 *vfilter, |
118 | mlib_s32 n, |
119 | mlib_s32 m, |
120 | mlib_s32 nch, |
121 | mlib_s32 dnch); |
122 | |
123 | static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, |
124 | mlib_d64 *vdst, |
125 | const mlib_d64 *src, |
126 | const mlib_d64 *hfilter, |
127 | const mlib_d64 *vfilter, |
128 | mlib_s32 n, |
129 | mlib_s32 m, |
130 | mlib_s32 nch, |
131 | mlib_s32 dnch); |
132 | #endif /* 0 */ |
133 | |
134 | /***************************************************************/ |
135 | mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, |
136 | const mlib_image *src, |
137 | const mlib_d64 *kernel, |
138 | mlib_s32 m, |
139 | mlib_s32 n, |
140 | mlib_s32 dm, |
141 | mlib_s32 dn, |
142 | mlib_s32 cmask, |
143 | mlib_edge edge) |
144 | { |
145 | mlib_type type; |
146 | |
147 | MLIB_IMAGE_CHECK(dst); |
148 | type = mlib_ImageGetType(dst); |
149 | |
150 | if (type != MLIB_FLOAT && type != MLIB_DOUBLE) |
151 | return MLIB_FAILURE; |
152 | |
153 | return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge); |
154 | } |
155 | |
156 | /***************************************************************/ |
157 | void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, |
158 | const mlib_f32 *src, |
159 | const mlib_d64 *kernel, |
160 | mlib_s32 n, |
161 | mlib_s32 m, |
162 | mlib_s32 nch, |
163 | mlib_s32 dnch) |
164 | { |
165 | mlib_f32 *hdst1 = dst + dnch; |
166 | mlib_s32 i, j; |
167 | |
168 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { |
169 | const mlib_f32 *src2 = src + 2 * nch; |
170 | mlib_f32 hval0 = (mlib_f32) kernel[0]; |
171 | mlib_f32 hval1 = (mlib_f32) kernel[1]; |
172 | mlib_f32 hval2 = (mlib_f32) kernel[2]; |
173 | mlib_f32 val0 = src[0]; |
174 | mlib_f32 val1 = src[nch]; |
175 | mlib_f32 hdvl = dst[0]; |
176 | |
177 | #ifdef __SUNPRO_C |
178 | #pragma pipeloop(0) |
179 | #endif /* __SUNPRO_C */ |
180 | for (i = 0; i < n; i++) { |
181 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
182 | mlib_f32 val2 = src2[i * nch]; |
183 | |
184 | hdvl = hdst1[i * dnch]; |
185 | hdvl0 += val1 * hval1; |
186 | hdvl0 += val2 * hval2; |
187 | val0 = val1; |
188 | val1 = val2; |
189 | |
190 | dst[i * dnch] = hdvl0; |
191 | } |
192 | } |
193 | |
194 | if (j < m - 1) { |
195 | const mlib_f32 *src2 = src + 2 * nch; |
196 | mlib_f32 hval0 = (mlib_f32) kernel[0]; |
197 | mlib_f32 hval1 = (mlib_f32) kernel[1]; |
198 | mlib_f32 val0 = src[0]; |
199 | mlib_f32 val1 = src[nch]; |
200 | mlib_f32 hdvl = dst[0]; |
201 | #ifdef __SUNPRO_C |
202 | #pragma pipeloop(0) |
203 | #endif /* __SUNPRO_C */ |
204 | for (i = 0; i < n; i++) { |
205 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
206 | mlib_f32 val2 = src2[i * nch]; |
207 | |
208 | hdvl = hdst1[i * dnch]; |
209 | hdvl0 += val1 * hval1; |
210 | val0 = val1; |
211 | val1 = val2; |
212 | |
213 | dst[i * dnch] = hdvl0; |
214 | } |
215 | |
216 | } |
217 | else if (j < m) { |
218 | const mlib_f32 *src2 = src + 2 * nch; |
219 | mlib_f32 hval0 = (mlib_f32) kernel[0]; |
220 | mlib_f32 val0 = src[0]; |
221 | mlib_f32 val1 = src[nch]; |
222 | mlib_f32 hdvl = dst[0]; |
223 | |
224 | #ifdef __SUNPRO_C |
225 | #pragma pipeloop(0) |
226 | #endif /* __SUNPRO_C */ |
227 | for (i = 0; i < n; i++) { |
228 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
229 | mlib_f32 val2 = src2[i * nch]; |
230 | |
231 | hdvl = hdst1[i * dnch]; |
232 | val0 = val1; |
233 | val1 = val2; |
234 | |
235 | dst[i * dnch] = hdvl0; |
236 | } |
237 | } |
238 | } |
239 | |
240 | /***************************************************************/ |
241 | void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, |
242 | const mlib_f32 *src, |
243 | mlib_s32 n, |
244 | mlib_s32 nch, |
245 | mlib_s32 dx_l, |
246 | mlib_s32 dx_r) |
247 | { |
248 | mlib_s32 i; |
249 | mlib_f32 val = src[0]; |
250 | |
251 | for (i = 0; i < dx_l; i++) |
252 | dst[i] = val; |
253 | #ifdef __SUNPRO_C |
254 | #pragma pipeloop(0) |
255 | #endif /* __SUNPRO_C */ |
256 | for (; i < n - dx_r; i++) |
257 | dst[i] = src[nch * (i - dx_l)]; |
258 | val = dst[n - dx_r - 1]; |
259 | for (; i < n; i++) |
260 | dst[i] = val; |
261 | } |
262 | |
263 | /***************************************************************/ |
264 | mlib_status mlib_convMxNext_f32(mlib_image *dst, |
265 | const mlib_image *src, |
266 | const mlib_d64 *kernel, |
267 | mlib_s32 m, |
268 | mlib_s32 n, |
269 | mlib_s32 dx_l, |
270 | mlib_s32 dx_r, |
271 | mlib_s32 dy_t, |
272 | mlib_s32 dy_b, |
273 | mlib_s32 cmask) |
274 | { |
275 | mlib_d64 dspace[1024], *dsa = dspace; |
276 | mlib_s32 wid_e = mlib_ImageGetWidth(src); |
277 | mlib_f32 *fsa; |
278 | mlib_f32 *da = mlib_ImageGetData(dst); |
279 | mlib_f32 *sa = mlib_ImageGetData(src); |
280 | mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2; |
281 | mlib_s32 slb = mlib_ImageGetStride(src) >> 2; |
282 | mlib_s32 dw = mlib_ImageGetWidth(dst); |
283 | mlib_s32 dh = mlib_ImageGetHeight(dst); |
284 | mlib_s32 nch = mlib_ImageGetChannels(dst); |
285 | mlib_s32 i, j, j1, k; |
286 | |
287 | if (3 * wid_e + m > 1024) { |
288 | dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); |
289 | |
290 | if (dsa == NULL) |
291 | return MLIB_FAILURE; |
292 | } |
293 | |
294 | fsa = (mlib_f32 *) dsa; |
295 | |
296 | for (j = 0; j < dh; j++, da += dlb) { |
297 | for (k = 0; k < nch; k++) |
298 | if (cmask & (1 << (nch - 1 - k))) { |
299 | const mlib_f32 *sa1 = sa + k; |
300 | mlib_f32 *da1 = da + k; |
301 | const mlib_d64 *kernel1 = kernel; |
302 | |
303 | for (i = 0; i < dw; i++) |
304 | da1[i * nch] = 0.f; |
305 | for (j1 = 0; j1 < n; j1++, kernel1 += m) { |
306 | mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r); |
307 | mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch); |
308 | |
309 | if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) |
310 | sa1 += slb; |
311 | } |
312 | } |
313 | |
314 | if ((j >= dy_t) && (j < dh + n - dy_b - 2)) |
315 | sa += slb; |
316 | } |
317 | |
318 | if (dsa != dspace) |
319 | mlib_free(dsa); |
320 | return MLIB_SUCCESS; |
321 | } |
322 | |
323 | /***************************************************************/ |
324 | #if 0 |
325 | |
326 | void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, |
327 | mlib_f32 *vdst, |
328 | const mlib_f32 *src, |
329 | const mlib_d64 *hfilter, |
330 | const mlib_d64 *vfilter, |
331 | mlib_s32 n, |
332 | mlib_s32 m, |
333 | mlib_s32 nch, |
334 | mlib_s32 dnch) |
335 | { |
336 | mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; |
337 | mlib_s32 i, j; |
338 | |
339 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { |
340 | mlib_f32 *src2 = src + 2 * nch; |
341 | mlib_f32 hval0 = (mlib_f32) hfilter[0]; |
342 | mlib_f32 vval0 = (mlib_f32) vfilter[0]; |
343 | mlib_f32 hval1 = (mlib_f32) hfilter[1]; |
344 | mlib_f32 vval1 = (mlib_f32) vfilter[1]; |
345 | mlib_f32 hval2 = (mlib_f32) hfilter[2]; |
346 | mlib_f32 vval2 = (mlib_f32) vfilter[2]; |
347 | mlib_f32 val0 = src[0]; |
348 | mlib_f32 val1 = src[nch]; |
349 | mlib_f32 hdvl = hdst[0]; |
350 | mlib_f32 vdvl = vdst[0]; |
351 | |
352 | #ifdef __SUNPRO_C |
353 | #pragma pipeloop(0) |
354 | #endif /* __SUNPRO_C */ |
355 | for (i = 0; i < n; i++) { |
356 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
357 | mlib_f32 vdvl0 = val0 * vval0 + vdvl; |
358 | mlib_f32 val2 = src2[i * nch]; |
359 | |
360 | hdvl = hdst1[i * dnch]; |
361 | vdvl = vdst1[i * dnch]; |
362 | hdvl0 += val1 * hval1; |
363 | vdvl0 += val1 * vval1; |
364 | hdvl0 += val2 * hval2; |
365 | vdvl0 += val2 * vval2; |
366 | val0 = val1; |
367 | val1 = val2; |
368 | |
369 | hdst[i * dnch] = hdvl0; |
370 | vdst[i * dnch] = vdvl0; |
371 | } |
372 | } |
373 | |
374 | if (j < m - 1) { |
375 | mlib_f32 *src2 = src + 2 * nch; |
376 | mlib_f32 hval0 = (mlib_f32) hfilter[0]; |
377 | mlib_f32 vval0 = (mlib_f32) vfilter[0]; |
378 | mlib_f32 hval1 = (mlib_f32) hfilter[1]; |
379 | mlib_f32 vval1 = (mlib_f32) vfilter[1]; |
380 | mlib_f32 val0 = src[0]; |
381 | mlib_f32 val1 = src[nch]; |
382 | mlib_f32 hdvl = hdst[0]; |
383 | mlib_f32 vdvl = vdst[0]; |
384 | |
385 | #ifdef __SUNPRO_C |
386 | #pragma pipeloop(0) |
387 | #endif /* __SUNPRO_C */ |
388 | for (i = 0; i < n; i++) { |
389 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
390 | mlib_f32 vdvl0 = val0 * vval0 + vdvl; |
391 | mlib_f32 val2 = src2[i * nch]; |
392 | |
393 | hdvl = hdst1[i * dnch]; |
394 | vdvl = vdst1[i * dnch]; |
395 | hdvl0 += val1 * hval1; |
396 | vdvl0 += val1 * vval1; |
397 | val0 = val1; |
398 | val1 = val2; |
399 | |
400 | hdst[i * dnch] = hdvl0; |
401 | vdst[i * dnch] = vdvl0; |
402 | } |
403 | |
404 | } |
405 | else if (j < m) { |
406 | mlib_f32 *src2 = src + 2 * nch; |
407 | mlib_f32 hval0 = (mlib_f32) hfilter[0]; |
408 | mlib_f32 vval0 = (mlib_f32) vfilter[0]; |
409 | mlib_f32 val0 = src[0]; |
410 | mlib_f32 val1 = src[nch]; |
411 | mlib_f32 hdvl = hdst[0]; |
412 | mlib_f32 vdvl = vdst[0]; |
413 | |
414 | #ifdef __SUNPRO_C |
415 | #pragma pipeloop(0) |
416 | #endif /* __SUNPRO_C */ |
417 | for (i = 0; i < n; i++) { |
418 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
419 | mlib_f32 vdvl0 = val0 * vval0 + vdvl; |
420 | mlib_f32 val2 = src2[i * nch]; |
421 | |
422 | hdvl = hdst1[i * dnch]; |
423 | vdvl = vdst1[i * dnch]; |
424 | val0 = val1; |
425 | val1 = val2; |
426 | |
427 | hdst[i * dnch] = hdvl0; |
428 | vdst[i * dnch] = vdvl0; |
429 | } |
430 | } |
431 | } |
432 | |
433 | /***************************************************************/ |
434 | void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, |
435 | mlib_d64 *vdst, |
436 | const mlib_d64 *src, |
437 | const mlib_d64 *hfilter, |
438 | const mlib_d64 *vfilter, |
439 | mlib_s32 n, |
440 | mlib_s32 m, |
441 | mlib_s32 nch, |
442 | mlib_s32 dnch) |
443 | { |
444 | mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; |
445 | mlib_s32 i, j; |
446 | |
447 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { |
448 | mlib_d64 *src2 = src + 2 * nch; |
449 | mlib_d64 hval0 = hfilter[0]; |
450 | mlib_d64 vval0 = vfilter[0]; |
451 | mlib_d64 hval1 = hfilter[1]; |
452 | mlib_d64 vval1 = vfilter[1]; |
453 | mlib_d64 hval2 = hfilter[2]; |
454 | mlib_d64 vval2 = vfilter[2]; |
455 | mlib_d64 val0 = src[0]; |
456 | mlib_d64 val1 = src[nch]; |
457 | mlib_d64 hdvl = hdst[0]; |
458 | mlib_d64 vdvl = vdst[0]; |
459 | |
460 | #ifdef __SUNPRO_C |
461 | #pragma pipeloop(0) |
462 | #endif /* __SUNPRO_C */ |
463 | for (i = 0; i < n; i++) { |
464 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
465 | mlib_d64 vdvl0 = val0 * vval0 + vdvl; |
466 | mlib_d64 val2 = src2[i * nch]; |
467 | |
468 | hdvl = hdst1[i * dnch]; |
469 | vdvl = vdst1[i * dnch]; |
470 | hdvl0 += val1 * hval1; |
471 | vdvl0 += val1 * vval1; |
472 | hdvl0 += val2 * hval2; |
473 | vdvl0 += val2 * vval2; |
474 | val0 = val1; |
475 | val1 = val2; |
476 | |
477 | hdst[i * dnch] = hdvl0; |
478 | vdst[i * dnch] = vdvl0; |
479 | } |
480 | } |
481 | |
482 | if (j < m - 1) { |
483 | mlib_d64 *src2 = src + 2 * nch; |
484 | mlib_d64 hval0 = hfilter[0]; |
485 | mlib_d64 vval0 = vfilter[0]; |
486 | mlib_d64 hval1 = hfilter[1]; |
487 | mlib_d64 vval1 = vfilter[1]; |
488 | mlib_d64 val0 = src[0]; |
489 | mlib_d64 val1 = src[nch]; |
490 | mlib_d64 hdvl = hdst[0]; |
491 | mlib_d64 vdvl = vdst[0]; |
492 | |
493 | #ifdef __SUNPRO_C |
494 | #pragma pipeloop(0) |
495 | #endif /* __SUNPRO_C */ |
496 | for (i = 0; i < n; i++) { |
497 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
498 | mlib_d64 vdvl0 = val0 * vval0 + vdvl; |
499 | mlib_d64 val2 = src2[i * nch]; |
500 | |
501 | hdvl = hdst1[i * dnch]; |
502 | vdvl = vdst1[i * dnch]; |
503 | hdvl0 += val1 * hval1; |
504 | vdvl0 += val1 * vval1; |
505 | val0 = val1; |
506 | val1 = val2; |
507 | |
508 | hdst[i * dnch] = hdvl0; |
509 | vdst[i * dnch] = vdvl0; |
510 | } |
511 | |
512 | } |
513 | else if (j < m) { |
514 | mlib_d64 *src2 = src + 2 * nch; |
515 | mlib_d64 hval0 = hfilter[0]; |
516 | mlib_d64 vval0 = vfilter[0]; |
517 | mlib_d64 val0 = src[0]; |
518 | mlib_d64 val1 = src[nch]; |
519 | mlib_d64 hdvl = hdst[0]; |
520 | mlib_d64 vdvl = vdst[0]; |
521 | |
522 | #ifdef __SUNPRO_C |
523 | #pragma pipeloop(0) |
524 | #endif /* __SUNPRO_C */ |
525 | for (i = 0; i < n; i++) { |
526 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
527 | mlib_d64 vdvl0 = val0 * vval0 + vdvl; |
528 | mlib_d64 val2 = src2[i * nch]; |
529 | |
530 | hdvl = hdst1[i * dnch]; |
531 | vdvl = vdst1[i * dnch]; |
532 | val0 = val1; |
533 | val1 = val2; |
534 | |
535 | hdst[i * dnch] = hdvl0; |
536 | vdst[i * dnch] = vdvl0; |
537 | } |
538 | } |
539 | } |
540 | |
541 | #endif /* 0 */ |
542 | |
543 | /***************************************************************/ |
544 | void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, |
545 | const mlib_d64 *src, |
546 | const mlib_d64 *kernel, |
547 | mlib_s32 n, |
548 | mlib_s32 m, |
549 | mlib_s32 nch, |
550 | mlib_s32 dnch) |
551 | { |
552 | mlib_d64 *hdst1 = dst + dnch; |
553 | mlib_s32 i, j; |
554 | |
555 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { |
556 | const mlib_d64 *src2 = src + 2 * nch; |
557 | mlib_d64 hval0 = kernel[0]; |
558 | mlib_d64 hval1 = kernel[1]; |
559 | mlib_d64 hval2 = kernel[2]; |
560 | mlib_d64 val0 = src[0]; |
561 | mlib_d64 val1 = src[nch]; |
562 | mlib_d64 hdvl = dst[0]; |
563 | |
564 | #ifdef __SUNPRO_C |
565 | #pragma pipeloop(0) |
566 | #endif /* __SUNPRO_C */ |
567 | for (i = 0; i < n; i++) { |
568 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
569 | mlib_d64 val2 = src2[i * nch]; |
570 | |
571 | hdvl = hdst1[i * dnch]; |
572 | hdvl0 += val1 * hval1; |
573 | hdvl0 += val2 * hval2; |
574 | val0 = val1; |
575 | val1 = val2; |
576 | |
577 | dst[i * dnch] = hdvl0; |
578 | } |
579 | } |
580 | |
581 | if (j < m - 1) { |
582 | const mlib_d64 *src2 = src + 2 * nch; |
583 | mlib_d64 hval0 = kernel[0]; |
584 | mlib_d64 hval1 = kernel[1]; |
585 | mlib_d64 val0 = src[0]; |
586 | mlib_d64 val1 = src[nch]; |
587 | mlib_d64 hdvl = dst[0]; |
588 | |
589 | #ifdef __SUNPRO_C |
590 | #pragma pipeloop(0) |
591 | #endif /* __SUNPRO_C */ |
592 | for (i = 0; i < n; i++) { |
593 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
594 | mlib_d64 val2 = src2[i * nch]; |
595 | |
596 | hdvl = hdst1[i * dnch]; |
597 | hdvl0 += val1 * hval1; |
598 | val0 = val1; |
599 | val1 = val2; |
600 | |
601 | dst[i * dnch] = hdvl0; |
602 | } |
603 | |
604 | } |
605 | else if (j < m) { |
606 | const mlib_d64 *src2 = src + 2 * nch; |
607 | mlib_d64 hval0 = kernel[0]; |
608 | mlib_d64 val0 = src[0]; |
609 | mlib_d64 val1 = src[nch]; |
610 | mlib_d64 hdvl = dst[0]; |
611 | |
612 | #ifdef __SUNPRO_C |
613 | #pragma pipeloop(0) |
614 | #endif /* __SUNPRO_C */ |
615 | for (i = 0; i < n; i++) { |
616 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
617 | mlib_d64 val2 = src2[i * nch]; |
618 | |
619 | hdvl = hdst1[i * dnch]; |
620 | val0 = val1; |
621 | val1 = val2; |
622 | |
623 | dst[i * dnch] = hdvl0; |
624 | } |
625 | } |
626 | } |
627 | |
628 | /***************************************************************/ |
629 | void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, |
630 | const mlib_d64 *src, |
631 | mlib_s32 n, |
632 | mlib_s32 nch, |
633 | mlib_s32 dx_l, |
634 | mlib_s32 dx_r) |
635 | { |
636 | mlib_s32 i; |
637 | mlib_d64 val = src[0]; |
638 | |
639 | for (i = 0; i < dx_l; i++) |
640 | dst[i] = val; |
641 | #ifdef __SUNPRO_C |
642 | #pragma pipeloop(0) |
643 | #endif /* __SUNPRO_C */ |
644 | for (; i < n - dx_r; i++) |
645 | dst[i] = src[nch * (i - dx_l)]; |
646 | val = dst[n - dx_r - 1]; |
647 | for (; i < n; i++) |
648 | dst[i] = val; |
649 | } |
650 | |
651 | /***************************************************************/ |
652 | mlib_status mlib_convMxNext_d64(mlib_image *dst, |
653 | const mlib_image *src, |
654 | const mlib_d64 *kernel, |
655 | mlib_s32 m, |
656 | mlib_s32 n, |
657 | mlib_s32 dx_l, |
658 | mlib_s32 dx_r, |
659 | mlib_s32 dy_t, |
660 | mlib_s32 dy_b, |
661 | mlib_s32 cmask) |
662 | { |
663 | mlib_d64 dspace[1024], *dsa = dspace; |
664 | mlib_s32 wid_e = mlib_ImageGetWidth(src); |
665 | mlib_d64 *da = mlib_ImageGetData(dst); |
666 | mlib_d64 *sa = mlib_ImageGetData(src); |
667 | mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3; |
668 | mlib_s32 slb = mlib_ImageGetStride(src) >> 3; |
669 | mlib_s32 dw = mlib_ImageGetWidth(dst); |
670 | mlib_s32 dh = mlib_ImageGetHeight(dst); |
671 | mlib_s32 nch = mlib_ImageGetChannels(dst); |
672 | mlib_s32 i, j, j1, k; |
673 | |
674 | if (3 * wid_e + m > 1024) { |
675 | dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); |
676 | |
677 | if (dsa == NULL) |
678 | return MLIB_FAILURE; |
679 | } |
680 | |
681 | for (j = 0; j < dh; j++, da += dlb) { |
682 | for (k = 0; k < nch; k++) |
683 | if (cmask & (1 << (nch - 1 - k))) { |
684 | mlib_d64 *sa1 = sa + k; |
685 | mlib_d64 *da1 = da + k; |
686 | const mlib_d64 *kernel1 = kernel; |
687 | |
688 | for (i = 0; i < dw; i++) |
689 | da1[i * nch] = 0.; |
690 | for (j1 = 0; j1 < n; j1++, kernel1 += m) { |
691 | mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r); |
692 | mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch); |
693 | |
694 | if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) |
695 | sa1 += slb; |
696 | } |
697 | } |
698 | |
699 | if ((j >= dy_t) && (j < dh + n - dy_b - 2)) |
700 | sa += slb; |
701 | } |
702 | |
703 | if (dsa != dspace) |
704 | mlib_free(dsa); |
705 | return MLIB_SUCCESS; |
706 | } |
707 | |
708 | /***************************************************************/ |
709 | |