1/*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 * mlib_ImageConvMxN_Fp - image convolution with edge condition
30 *
31 * SYNOPSIS
32 * mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst,
33 * const mlib_image *src,
34 * const mlib_d64 *kernel,
35 * mlib_s32 m,
36 * mlib_s32 n,
37 * mlib_s32 dm,
38 * mlib_s32 dn,
39 * mlib_s32 cmask,
40 * mlib_edge edge)
41 *
42 * ARGUMENTS
43 * dst Pointer to destination image.
44 * src Pointer to source image.
45 * m Kernel width (m must be not less than 1).
46 * n Kernel height (n must be not less than 1).
47 * dm, dn Position of key element in convolution kernel.
48 * kernel Pointer to convolution kernel.
49 * cmask Channel mask to indicate the channels to be convolved.
50 * Each bit of which represents a channel in the image. The
51 * channels corresponded to 1 bits are those to be processed.
52 * edge Type of edge condition.
53 *
54 * DESCRIPTION
55 * 2-D convolution, MxN kernel.
56 *
57 * The center of the source image is mapped to the center of the
58 * destination image.
59 * The unselected channels are not overwritten. If both src and dst have
60 * just one channel, cmask is ignored.
61 *
62 * The edge condition can be one of the following:
63 * MLIB_EDGE_DST_NO_WRITE (default)
64 * MLIB_EDGE_DST_FILL_ZERO
65 * MLIB_EDGE_DST_COPY_SRC
66 * MLIB_EDGE_SRC_EXTEND
67 *
68 * RESTRICTION
69 * The src and the dst must be the same type and have same number
70 * of channels (1, 2, 3, or 4).
71 * m >= 1, n >= 1,
72 * 0 <= dm < m, 0 <= dn < n.
73 */
74
75#include "mlib_image.h"
76#include "mlib_ImageCheck.h"
77#include "mlib_SysMath.h"
78#include "mlib_ImageConv.h"
79
80/***************************************************************/
81static void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst,
82 const mlib_f32 *src,
83 const mlib_d64 *kernel,
84 mlib_s32 n,
85 mlib_s32 m,
86 mlib_s32 nch,
87 mlib_s32 dnch);
88
89static void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst,
90 const mlib_f32 *src,
91 mlib_s32 n,
92 mlib_s32 nch,
93 mlib_s32 dx_l,
94 mlib_s32 dx_r);
95
96static void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst,
97 const mlib_d64 *src,
98 const mlib_d64 *kernel,
99 mlib_s32 n,
100 mlib_s32 m,
101 mlib_s32 nch,
102 mlib_s32 dnch);
103
104static void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst,
105 const mlib_d64 *src,
106 mlib_s32 n,
107 mlib_s32 nch,
108 mlib_s32 dx_l,
109 mlib_s32 dx_r);
110
111/***************************************************************/
112#if 0
113static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst,
114 mlib_f32 *vdst,
115 const mlib_f32 *src,
116 const mlib_d64 *hfilter,
117 const mlib_d64 *vfilter,
118 mlib_s32 n,
119 mlib_s32 m,
120 mlib_s32 nch,
121 mlib_s32 dnch);
122
123static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst,
124 mlib_d64 *vdst,
125 const mlib_d64 *src,
126 const mlib_d64 *hfilter,
127 const mlib_d64 *vfilter,
128 mlib_s32 n,
129 mlib_s32 m,
130 mlib_s32 nch,
131 mlib_s32 dnch);
132#endif /* 0 */
133
134/***************************************************************/
135mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst,
136 const mlib_image *src,
137 const mlib_d64 *kernel,
138 mlib_s32 m,
139 mlib_s32 n,
140 mlib_s32 dm,
141 mlib_s32 dn,
142 mlib_s32 cmask,
143 mlib_edge edge)
144{
145 mlib_type type;
146
147 MLIB_IMAGE_CHECK(dst);
148 type = mlib_ImageGetType(dst);
149
150 if (type != MLIB_FLOAT && type != MLIB_DOUBLE)
151 return MLIB_FAILURE;
152
153 return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge);
154}
155
156/***************************************************************/
157void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst,
158 const mlib_f32 *src,
159 const mlib_d64 *kernel,
160 mlib_s32 n,
161 mlib_s32 m,
162 mlib_s32 nch,
163 mlib_s32 dnch)
164{
165 mlib_f32 *hdst1 = dst + dnch;
166 mlib_s32 i, j;
167
168 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
169 const mlib_f32 *src2 = src + 2 * nch;
170 mlib_f32 hval0 = (mlib_f32) kernel[0];
171 mlib_f32 hval1 = (mlib_f32) kernel[1];
172 mlib_f32 hval2 = (mlib_f32) kernel[2];
173 mlib_f32 val0 = src[0];
174 mlib_f32 val1 = src[nch];
175 mlib_f32 hdvl = dst[0];
176
177#ifdef __SUNPRO_C
178#pragma pipeloop(0)
179#endif /* __SUNPRO_C */
180 for (i = 0; i < n; i++) {
181 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
182 mlib_f32 val2 = src2[i * nch];
183
184 hdvl = hdst1[i * dnch];
185 hdvl0 += val1 * hval1;
186 hdvl0 += val2 * hval2;
187 val0 = val1;
188 val1 = val2;
189
190 dst[i * dnch] = hdvl0;
191 }
192 }
193
194 if (j < m - 1) {
195 const mlib_f32 *src2 = src + 2 * nch;
196 mlib_f32 hval0 = (mlib_f32) kernel[0];
197 mlib_f32 hval1 = (mlib_f32) kernel[1];
198 mlib_f32 val0 = src[0];
199 mlib_f32 val1 = src[nch];
200 mlib_f32 hdvl = dst[0];
201#ifdef __SUNPRO_C
202#pragma pipeloop(0)
203#endif /* __SUNPRO_C */
204 for (i = 0; i < n; i++) {
205 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
206 mlib_f32 val2 = src2[i * nch];
207
208 hdvl = hdst1[i * dnch];
209 hdvl0 += val1 * hval1;
210 val0 = val1;
211 val1 = val2;
212
213 dst[i * dnch] = hdvl0;
214 }
215
216 }
217 else if (j < m) {
218 const mlib_f32 *src2 = src + 2 * nch;
219 mlib_f32 hval0 = (mlib_f32) kernel[0];
220 mlib_f32 val0 = src[0];
221 mlib_f32 val1 = src[nch];
222 mlib_f32 hdvl = dst[0];
223
224#ifdef __SUNPRO_C
225#pragma pipeloop(0)
226#endif /* __SUNPRO_C */
227 for (i = 0; i < n; i++) {
228 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
229 mlib_f32 val2 = src2[i * nch];
230
231 hdvl = hdst1[i * dnch];
232 val0 = val1;
233 val1 = val2;
234
235 dst[i * dnch] = hdvl0;
236 }
237 }
238}
239
240/***************************************************************/
241void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst,
242 const mlib_f32 *src,
243 mlib_s32 n,
244 mlib_s32 nch,
245 mlib_s32 dx_l,
246 mlib_s32 dx_r)
247{
248 mlib_s32 i;
249 mlib_f32 val = src[0];
250
251 for (i = 0; i < dx_l; i++)
252 dst[i] = val;
253#ifdef __SUNPRO_C
254#pragma pipeloop(0)
255#endif /* __SUNPRO_C */
256 for (; i < n - dx_r; i++)
257 dst[i] = src[nch * (i - dx_l)];
258 val = dst[n - dx_r - 1];
259 for (; i < n; i++)
260 dst[i] = val;
261}
262
263/***************************************************************/
264mlib_status mlib_convMxNext_f32(mlib_image *dst,
265 const mlib_image *src,
266 const mlib_d64 *kernel,
267 mlib_s32 m,
268 mlib_s32 n,
269 mlib_s32 dx_l,
270 mlib_s32 dx_r,
271 mlib_s32 dy_t,
272 mlib_s32 dy_b,
273 mlib_s32 cmask)
274{
275 mlib_d64 dspace[1024], *dsa = dspace;
276 mlib_s32 wid_e = mlib_ImageGetWidth(src);
277 mlib_f32 *fsa;
278 mlib_f32 *da = mlib_ImageGetData(dst);
279 mlib_f32 *sa = mlib_ImageGetData(src);
280 mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2;
281 mlib_s32 slb = mlib_ImageGetStride(src) >> 2;
282 mlib_s32 dw = mlib_ImageGetWidth(dst);
283 mlib_s32 dh = mlib_ImageGetHeight(dst);
284 mlib_s32 nch = mlib_ImageGetChannels(dst);
285 mlib_s32 i, j, j1, k;
286
287 if (3 * wid_e + m > 1024) {
288 dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
289
290 if (dsa == NULL)
291 return MLIB_FAILURE;
292 }
293
294 fsa = (mlib_f32 *) dsa;
295
296 for (j = 0; j < dh; j++, da += dlb) {
297 for (k = 0; k < nch; k++)
298 if (cmask & (1 << (nch - 1 - k))) {
299 const mlib_f32 *sa1 = sa + k;
300 mlib_f32 *da1 = da + k;
301 const mlib_d64 *kernel1 = kernel;
302
303 for (i = 0; i < dw; i++)
304 da1[i * nch] = 0.f;
305 for (j1 = 0; j1 < n; j1++, kernel1 += m) {
306 mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r);
307 mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch);
308
309 if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
310 sa1 += slb;
311 }
312 }
313
314 if ((j >= dy_t) && (j < dh + n - dy_b - 2))
315 sa += slb;
316 }
317
318 if (dsa != dspace)
319 mlib_free(dsa);
320 return MLIB_SUCCESS;
321}
322
323/***************************************************************/
324#if 0
325
326void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst,
327 mlib_f32 *vdst,
328 const mlib_f32 *src,
329 const mlib_d64 *hfilter,
330 const mlib_d64 *vfilter,
331 mlib_s32 n,
332 mlib_s32 m,
333 mlib_s32 nch,
334 mlib_s32 dnch)
335{
336 mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
337 mlib_s32 i, j;
338
339 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
340 mlib_f32 *src2 = src + 2 * nch;
341 mlib_f32 hval0 = (mlib_f32) hfilter[0];
342 mlib_f32 vval0 = (mlib_f32) vfilter[0];
343 mlib_f32 hval1 = (mlib_f32) hfilter[1];
344 mlib_f32 vval1 = (mlib_f32) vfilter[1];
345 mlib_f32 hval2 = (mlib_f32) hfilter[2];
346 mlib_f32 vval2 = (mlib_f32) vfilter[2];
347 mlib_f32 val0 = src[0];
348 mlib_f32 val1 = src[nch];
349 mlib_f32 hdvl = hdst[0];
350 mlib_f32 vdvl = vdst[0];
351
352#ifdef __SUNPRO_C
353#pragma pipeloop(0)
354#endif /* __SUNPRO_C */
355 for (i = 0; i < n; i++) {
356 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
357 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
358 mlib_f32 val2 = src2[i * nch];
359
360 hdvl = hdst1[i * dnch];
361 vdvl = vdst1[i * dnch];
362 hdvl0 += val1 * hval1;
363 vdvl0 += val1 * vval1;
364 hdvl0 += val2 * hval2;
365 vdvl0 += val2 * vval2;
366 val0 = val1;
367 val1 = val2;
368
369 hdst[i * dnch] = hdvl0;
370 vdst[i * dnch] = vdvl0;
371 }
372 }
373
374 if (j < m - 1) {
375 mlib_f32 *src2 = src + 2 * nch;
376 mlib_f32 hval0 = (mlib_f32) hfilter[0];
377 mlib_f32 vval0 = (mlib_f32) vfilter[0];
378 mlib_f32 hval1 = (mlib_f32) hfilter[1];
379 mlib_f32 vval1 = (mlib_f32) vfilter[1];
380 mlib_f32 val0 = src[0];
381 mlib_f32 val1 = src[nch];
382 mlib_f32 hdvl = hdst[0];
383 mlib_f32 vdvl = vdst[0];
384
385#ifdef __SUNPRO_C
386#pragma pipeloop(0)
387#endif /* __SUNPRO_C */
388 for (i = 0; i < n; i++) {
389 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
390 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
391 mlib_f32 val2 = src2[i * nch];
392
393 hdvl = hdst1[i * dnch];
394 vdvl = vdst1[i * dnch];
395 hdvl0 += val1 * hval1;
396 vdvl0 += val1 * vval1;
397 val0 = val1;
398 val1 = val2;
399
400 hdst[i * dnch] = hdvl0;
401 vdst[i * dnch] = vdvl0;
402 }
403
404 }
405 else if (j < m) {
406 mlib_f32 *src2 = src + 2 * nch;
407 mlib_f32 hval0 = (mlib_f32) hfilter[0];
408 mlib_f32 vval0 = (mlib_f32) vfilter[0];
409 mlib_f32 val0 = src[0];
410 mlib_f32 val1 = src[nch];
411 mlib_f32 hdvl = hdst[0];
412 mlib_f32 vdvl = vdst[0];
413
414#ifdef __SUNPRO_C
415#pragma pipeloop(0)
416#endif /* __SUNPRO_C */
417 for (i = 0; i < n; i++) {
418 mlib_f32 hdvl0 = val0 * hval0 + hdvl;
419 mlib_f32 vdvl0 = val0 * vval0 + vdvl;
420 mlib_f32 val2 = src2[i * nch];
421
422 hdvl = hdst1[i * dnch];
423 vdvl = vdst1[i * dnch];
424 val0 = val1;
425 val1 = val2;
426
427 hdst[i * dnch] = hdvl0;
428 vdst[i * dnch] = vdvl0;
429 }
430 }
431}
432
433/***************************************************************/
434void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst,
435 mlib_d64 *vdst,
436 const mlib_d64 *src,
437 const mlib_d64 *hfilter,
438 const mlib_d64 *vfilter,
439 mlib_s32 n,
440 mlib_s32 m,
441 mlib_s32 nch,
442 mlib_s32 dnch)
443{
444 mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch;
445 mlib_s32 i, j;
446
447 for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) {
448 mlib_d64 *src2 = src + 2 * nch;
449 mlib_d64 hval0 = hfilter[0];
450 mlib_d64 vval0 = vfilter[0];
451 mlib_d64 hval1 = hfilter[1];
452 mlib_d64 vval1 = vfilter[1];
453 mlib_d64 hval2 = hfilter[2];
454 mlib_d64 vval2 = vfilter[2];
455 mlib_d64 val0 = src[0];
456 mlib_d64 val1 = src[nch];
457 mlib_d64 hdvl = hdst[0];
458 mlib_d64 vdvl = vdst[0];
459
460#ifdef __SUNPRO_C
461#pragma pipeloop(0)
462#endif /* __SUNPRO_C */
463 for (i = 0; i < n; i++) {
464 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
465 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
466 mlib_d64 val2 = src2[i * nch];
467
468 hdvl = hdst1[i * dnch];
469 vdvl = vdst1[i * dnch];
470 hdvl0 += val1 * hval1;
471 vdvl0 += val1 * vval1;
472 hdvl0 += val2 * hval2;
473 vdvl0 += val2 * vval2;
474 val0 = val1;
475 val1 = val2;
476
477 hdst[i * dnch] = hdvl0;
478 vdst[i * dnch] = vdvl0;
479 }
480 }
481
482 if (j < m - 1) {
483 mlib_d64 *src2 = src + 2 * nch;
484 mlib_d64 hval0 = hfilter[0];
485 mlib_d64 vval0 = vfilter[0];
486 mlib_d64 hval1 = hfilter[1];
487 mlib_d64 vval1 = vfilter[1];
488 mlib_d64 val0 = src[0];
489 mlib_d64 val1 = src[nch];
490 mlib_d64 hdvl = hdst[0];
491 mlib_d64 vdvl = vdst[0];
492
493#ifdef __SUNPRO_C
494#pragma pipeloop(0)
495#endif /* __SUNPRO_C */
496 for (i = 0; i < n; i++) {
497 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
498 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
499 mlib_d64 val2 = src2[i * nch];
500
501 hdvl = hdst1[i * dnch];
502 vdvl = vdst1[i * dnch];
503 hdvl0 += val1 * hval1;
504 vdvl0 += val1 * vval1;
505 val0 = val1;
506 val1 = val2;
507
508 hdst[i * dnch] = hdvl0;
509 vdst[i * dnch] = vdvl0;
510 }
511
512 }
513 else if (j < m) {
514 mlib_d64 *src2 = src + 2 * nch;
515 mlib_d64 hval0 = hfilter[0];
516 mlib_d64 vval0 = vfilter[0];
517 mlib_d64 val0 = src[0];
518 mlib_d64 val1 = src[nch];
519 mlib_d64 hdvl = hdst[0];
520 mlib_d64 vdvl = vdst[0];
521
522#ifdef __SUNPRO_C
523#pragma pipeloop(0)
524#endif /* __SUNPRO_C */
525 for (i = 0; i < n; i++) {
526 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
527 mlib_d64 vdvl0 = val0 * vval0 + vdvl;
528 mlib_d64 val2 = src2[i * nch];
529
530 hdvl = hdst1[i * dnch];
531 vdvl = vdst1[i * dnch];
532 val0 = val1;
533 val1 = val2;
534
535 hdst[i * dnch] = hdvl0;
536 vdst[i * dnch] = vdvl0;
537 }
538 }
539}
540
541#endif /* 0 */
542
543/***************************************************************/
544void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst,
545 const mlib_d64 *src,
546 const mlib_d64 *kernel,
547 mlib_s32 n,
548 mlib_s32 m,
549 mlib_s32 nch,
550 mlib_s32 dnch)
551{
552 mlib_d64 *hdst1 = dst + dnch;
553 mlib_s32 i, j;
554
555 for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) {
556 const mlib_d64 *src2 = src + 2 * nch;
557 mlib_d64 hval0 = kernel[0];
558 mlib_d64 hval1 = kernel[1];
559 mlib_d64 hval2 = kernel[2];
560 mlib_d64 val0 = src[0];
561 mlib_d64 val1 = src[nch];
562 mlib_d64 hdvl = dst[0];
563
564#ifdef __SUNPRO_C
565#pragma pipeloop(0)
566#endif /* __SUNPRO_C */
567 for (i = 0; i < n; i++) {
568 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
569 mlib_d64 val2 = src2[i * nch];
570
571 hdvl = hdst1[i * dnch];
572 hdvl0 += val1 * hval1;
573 hdvl0 += val2 * hval2;
574 val0 = val1;
575 val1 = val2;
576
577 dst[i * dnch] = hdvl0;
578 }
579 }
580
581 if (j < m - 1) {
582 const mlib_d64 *src2 = src + 2 * nch;
583 mlib_d64 hval0 = kernel[0];
584 mlib_d64 hval1 = kernel[1];
585 mlib_d64 val0 = src[0];
586 mlib_d64 val1 = src[nch];
587 mlib_d64 hdvl = dst[0];
588
589#ifdef __SUNPRO_C
590#pragma pipeloop(0)
591#endif /* __SUNPRO_C */
592 for (i = 0; i < n; i++) {
593 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
594 mlib_d64 val2 = src2[i * nch];
595
596 hdvl = hdst1[i * dnch];
597 hdvl0 += val1 * hval1;
598 val0 = val1;
599 val1 = val2;
600
601 dst[i * dnch] = hdvl0;
602 }
603
604 }
605 else if (j < m) {
606 const mlib_d64 *src2 = src + 2 * nch;
607 mlib_d64 hval0 = kernel[0];
608 mlib_d64 val0 = src[0];
609 mlib_d64 val1 = src[nch];
610 mlib_d64 hdvl = dst[0];
611
612#ifdef __SUNPRO_C
613#pragma pipeloop(0)
614#endif /* __SUNPRO_C */
615 for (i = 0; i < n; i++) {
616 mlib_d64 hdvl0 = val0 * hval0 + hdvl;
617 mlib_d64 val2 = src2[i * nch];
618
619 hdvl = hdst1[i * dnch];
620 val0 = val1;
621 val1 = val2;
622
623 dst[i * dnch] = hdvl0;
624 }
625 }
626}
627
628/***************************************************************/
629void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst,
630 const mlib_d64 *src,
631 mlib_s32 n,
632 mlib_s32 nch,
633 mlib_s32 dx_l,
634 mlib_s32 dx_r)
635{
636 mlib_s32 i;
637 mlib_d64 val = src[0];
638
639 for (i = 0; i < dx_l; i++)
640 dst[i] = val;
641#ifdef __SUNPRO_C
642#pragma pipeloop(0)
643#endif /* __SUNPRO_C */
644 for (; i < n - dx_r; i++)
645 dst[i] = src[nch * (i - dx_l)];
646 val = dst[n - dx_r - 1];
647 for (; i < n; i++)
648 dst[i] = val;
649}
650
651/***************************************************************/
652mlib_status mlib_convMxNext_d64(mlib_image *dst,
653 const mlib_image *src,
654 const mlib_d64 *kernel,
655 mlib_s32 m,
656 mlib_s32 n,
657 mlib_s32 dx_l,
658 mlib_s32 dx_r,
659 mlib_s32 dy_t,
660 mlib_s32 dy_b,
661 mlib_s32 cmask)
662{
663 mlib_d64 dspace[1024], *dsa = dspace;
664 mlib_s32 wid_e = mlib_ImageGetWidth(src);
665 mlib_d64 *da = mlib_ImageGetData(dst);
666 mlib_d64 *sa = mlib_ImageGetData(src);
667 mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3;
668 mlib_s32 slb = mlib_ImageGetStride(src) >> 3;
669 mlib_s32 dw = mlib_ImageGetWidth(dst);
670 mlib_s32 dh = mlib_ImageGetHeight(dst);
671 mlib_s32 nch = mlib_ImageGetChannels(dst);
672 mlib_s32 i, j, j1, k;
673
674 if (3 * wid_e + m > 1024) {
675 dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64));
676
677 if (dsa == NULL)
678 return MLIB_FAILURE;
679 }
680
681 for (j = 0; j < dh; j++, da += dlb) {
682 for (k = 0; k < nch; k++)
683 if (cmask & (1 << (nch - 1 - k))) {
684 mlib_d64 *sa1 = sa + k;
685 mlib_d64 *da1 = da + k;
686 const mlib_d64 *kernel1 = kernel;
687
688 for (i = 0; i < dw; i++)
689 da1[i * nch] = 0.;
690 for (j1 = 0; j1 < n; j1++, kernel1 += m) {
691 mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r);
692 mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch);
693
694 if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2))
695 sa1 += slb;
696 }
697 }
698
699 if ((j >= dy_t) && (j < dh + n - dy_b - 2))
700 sa += slb;
701 }
702
703 if (dsa != dspace)
704 mlib_free(dsa);
705 return MLIB_SUCCESS;
706}
707
708/***************************************************************/
709