1 | /* |
2 | * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | |
27 | /* |
28 | * FUNCTIONS |
29 | * mlib_ImageCopy - Direct copy from one image to another. |
30 | * |
31 | * SYNOPSIS |
32 | * mlib_status mlib_ImageCopy(mlib_image *dst, |
33 | * const mlib_image *src); |
34 | * |
35 | * ARGUMENT |
36 | * dst pointer to output or destination image |
37 | * src pointer to input or source image |
38 | * |
39 | * RESTRICTION |
40 | * src and dst must have the same size, type and number of channels. |
41 | * They can have 1, 2, 3 or 4 channels of MLIB_BIT, MLIB_BYTE, MLIB_SHORT, |
42 | * MLIB_USHORT, MLIB_INT, MLIB_FLOAT or MLIB_DOUBLE data type. |
43 | * |
44 | * DESCRIPTION |
45 | * Direct copy from one image to another |
46 | */ |
47 | |
48 | #include <stdlib.h> |
49 | #include "mlib_image.h" |
50 | #include "mlib_ImageCheck.h" |
51 | #include "mlib_ImageCopy.h" |
52 | |
53 | /***************************************************************/ |
54 | #ifdef _MSC_VER |
55 | #pragma optimize("", off) /* Fix bug 4195132 */ |
56 | #endif /* _MSC_VER */ |
57 | |
58 | /***************************************************************/ |
59 | /* do not perform the coping by mlib_d64 data type for x86 */ |
60 | #ifdef i386 |
61 | |
62 | typedef struct { |
63 | mlib_s32 int0, int1; |
64 | } two_int; |
65 | |
66 | #define TYPE_64BIT two_int |
67 | |
68 | #else /* i386 */ |
69 | |
70 | #define TYPE_64BIT mlib_d64 |
71 | #endif /* i386 */ |
72 | |
73 | /***************************************************************/ |
74 | static void mlib_c_ImageCopy_u8(const mlib_image *src, |
75 | mlib_image *dst); |
76 | static void mlib_c_ImageCopy_s16(const mlib_image *src, |
77 | mlib_image *dst); |
78 | static void mlib_c_ImageCopy_s32(const mlib_image *src, |
79 | mlib_image *dst); |
80 | static void mlib_c_ImageCopy_d64(const mlib_image *src, |
81 | mlib_image *dst); |
82 | static void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp, |
83 | TYPE_64BIT *dp, |
84 | mlib_s32 size); |
85 | |
86 | /***************************************************************/ |
87 | mlib_status mlib_ImageCopy(mlib_image *dst, |
88 | const mlib_image *src) |
89 | { |
90 | mlib_s32 s_offset, d_offset; |
91 | mlib_s32 size, s_stride, d_stride; |
92 | mlib_s32 width; /* width in bytes of src and dst */ |
93 | mlib_s32 height; /* height in lines of src and dst */ |
94 | mlib_u8 *sa, *da; |
95 | mlib_s32 j; |
96 | |
97 | MLIB_IMAGE_CHECK(src); |
98 | MLIB_IMAGE_CHECK(dst); |
99 | MLIB_IMAGE_TYPE_EQUAL(src, dst); |
100 | MLIB_IMAGE_CHAN_EQUAL(src, dst); |
101 | MLIB_IMAGE_SIZE_EQUAL(src, dst); |
102 | |
103 | switch (mlib_ImageGetType(dst)) { |
104 | case MLIB_BIT: |
105 | width = mlib_ImageGetWidth(dst) * mlib_ImageGetChannels(dst); /* size in bits */ |
106 | height = mlib_ImageGetHeight(src); |
107 | sa = (mlib_u8 *) mlib_ImageGetData(src); |
108 | da = (mlib_u8 *) mlib_ImageGetData(dst); |
109 | |
110 | if (!mlib_ImageIsNotOneDvector(src) && !mlib_ImageIsNotOneDvector(dst)) { |
111 | size = height * (width >> 3); |
112 | if (!mlib_ImageIsNotAligned8(src) && !mlib_ImageIsNotAligned8(dst) && ((size & 7) == 0)) { |
113 | |
114 | mlib_c_ImageCopy_a1((TYPE_64BIT *) sa, (TYPE_64BIT *) da, size >> 3); |
115 | } |
116 | else { |
117 | |
118 | mlib_ImageCopy_na(sa, da, size); |
119 | } |
120 | } |
121 | else { |
122 | s_stride = mlib_ImageGetStride(src); |
123 | d_stride = mlib_ImageGetStride(dst); |
124 | s_offset = mlib_ImageGetBitOffset(src); /* in bits */ |
125 | d_offset = mlib_ImageGetBitOffset(dst); /* in bits */ |
126 | if (s_offset == d_offset) { |
127 | for (j = 0; j < height; j++) { |
128 | mlib_ImageCopy_bit_al(sa, da, width, s_offset); |
129 | sa += s_stride; |
130 | da += d_stride; |
131 | } |
132 | } |
133 | else { |
134 | for (j = 0; j < height; j++) { |
135 | mlib_ImageCopy_bit_na(sa, da, width, s_offset, d_offset); |
136 | sa += s_stride; |
137 | da += d_stride; |
138 | } |
139 | } |
140 | } |
141 | |
142 | break; |
143 | case MLIB_BYTE: |
144 | mlib_c_ImageCopy_u8(src, dst); |
145 | break; |
146 | case MLIB_SHORT: |
147 | case MLIB_USHORT: |
148 | mlib_c_ImageCopy_s16(src, dst); |
149 | break; |
150 | case MLIB_INT: |
151 | case MLIB_FLOAT: |
152 | mlib_c_ImageCopy_s32(src, dst); |
153 | break; |
154 | case MLIB_DOUBLE: |
155 | mlib_c_ImageCopy_d64(src, dst); |
156 | break; |
157 | default: |
158 | return MLIB_FAILURE; /* MLIB_BIT is not supported here */ |
159 | } |
160 | |
161 | return MLIB_SUCCESS; |
162 | } |
163 | |
164 | /***************************************************************/ |
165 | #define PREPAREVARS(type) \ |
166 | type *psrc = (type *) mlib_ImageGetData(src); \ |
167 | type *pdst = (type *) mlib_ImageGetData(dst); \ |
168 | mlib_s32 src_height = mlib_ImageGetHeight(src); \ |
169 | mlib_s32 src_width = mlib_ImageGetWidth(src); \ |
170 | mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \ |
171 | mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \ |
172 | mlib_s32 chan = mlib_ImageGetChannels(dst); \ |
173 | mlib_s32 i, j; \ |
174 | \ |
175 | src_width *= chan; \ |
176 | if (src_width == src_stride && src_width == dst_stride) { \ |
177 | src_width *= src_height; \ |
178 | src_height = 1; \ |
179 | } |
180 | |
181 | /***************************************************************/ |
182 | #define STRIP(pd, ps, w, h, data_type) { \ |
183 | data_type s0, s1; \ |
184 | for ( i = 0; i < h; i++ ) { \ |
185 | if ((j = (w & 1))) \ |
186 | pd[i * dst_stride] = ps[i * src_stride]; \ |
187 | for (; j < w; j += 2) { \ |
188 | s0 = ps[i * src_stride + j]; \ |
189 | s1 = ps[i * src_stride + j + 1]; \ |
190 | pd[i * dst_stride + j] = s0; \ |
191 | pd[i * dst_stride + j + 1] = s1; \ |
192 | } \ |
193 | } \ |
194 | } |
195 | |
196 | /***************************************************************/ |
197 | /* |
198 | * Both bit offsets of source and distination are the same |
199 | */ |
200 | |
201 | void mlib_ImageCopy_bit_al(const mlib_u8 *sa, |
202 | mlib_u8 *da, |
203 | mlib_s32 size, |
204 | mlib_s32 offset) |
205 | { |
206 | mlib_s32 b_size, i, j; |
207 | TYPE_64BIT *sp, *dp; |
208 | mlib_u8 mask0 = 0xFF; |
209 | mlib_u8 src, mask; |
210 | |
211 | if (size <= 0) return; |
212 | |
213 | if (size <= (8 - offset)) { |
214 | mask = mask0 << (8 - size); |
215 | mask >>= offset; |
216 | src = da[0]; |
217 | da[0] = (src & (~mask)) | (sa[0] & mask); |
218 | return; |
219 | } |
220 | |
221 | mask = mask0 >> offset; |
222 | src = da[0]; |
223 | da[0] = (src & (~mask)) | (sa[0] & mask); |
224 | da++; |
225 | sa++; |
226 | size = size - 8 + offset; |
227 | b_size = size >> 3; /* size in bytes */ |
228 | |
229 | for (j = 0; (j < b_size) && (((mlib_addr) da & 7) != 0); j++) |
230 | *da++ = *sa++; |
231 | |
232 | if ((((mlib_addr) sa ^ (mlib_addr) da) & 7) == 0) { |
233 | sp = (TYPE_64BIT *) sa; |
234 | dp = (TYPE_64BIT *) da; |
235 | #ifdef __SUNPRO_C |
236 | #pragma pipeloop(0) |
237 | #endif /* __SUNPRO_C */ |
238 | for (i = 0; j <= (b_size - 8); j += 8, i++) { |
239 | dp[i] = sp[i]; |
240 | } |
241 | |
242 | sa += i << 3; |
243 | da += i << 3; |
244 | } |
245 | else { |
246 | #ifdef _NO_LONGLONG |
247 | if ((((mlib_addr) sa ^ (mlib_addr) da) & 3) == 0) { |
248 | mlib_u32 *pws, *pwd; |
249 | |
250 | pws = (mlib_u32 *) sa; |
251 | pwd = (mlib_u32 *) da; |
252 | #ifdef __SUNPRO_C |
253 | #pragma pipeloop(0) |
254 | #endif /* __SUNPRO_C */ |
255 | for (i = 0; j <= (b_size - 4); j += 4, i++) { |
256 | pwd[i] = pws[i]; |
257 | } |
258 | |
259 | sa += i << 2; |
260 | da += i << 2; |
261 | } |
262 | else { |
263 | mlib_u32 *pws, *pwd, src0, src1; |
264 | mlib_s32 lshift = (mlib_addr) sa & 3, rshift; |
265 | |
266 | pwd = (mlib_u32 *) da; |
267 | pws = (mlib_u32 *) (sa - lshift); |
268 | lshift <<= 3; |
269 | rshift = 32 - lshift; |
270 | |
271 | src1 = pws[0]; |
272 | #ifdef __SUNPRO_C |
273 | #pragma pipeloop(0) |
274 | #endif /* __SUNPRO_C */ |
275 | for (i = 0; j <= (b_size - 4); j += 4, i++) { |
276 | src0 = src1; |
277 | src1 = pws[i + 1]; |
278 | #ifdef _LITTLE_ENDIAN |
279 | pwd[i] = (src0 >> lshift) | (src1 << rshift); |
280 | #else |
281 | pwd[i] = (src0 << lshift) | (src1 >> rshift); |
282 | #endif /* _LITTLE_ENDIAN */ |
283 | } |
284 | |
285 | sa += i << 2; |
286 | da += i << 2; |
287 | } |
288 | |
289 | #else |
290 | mlib_u64 *pws, *pwd, src0, src1; |
291 | mlib_s32 lshift = (mlib_s32) ((mlib_addr) sa & 7), rshift; |
292 | |
293 | pwd = (mlib_u64 *) da; |
294 | pws = (mlib_u64 *) (sa - lshift); |
295 | lshift <<= 3; |
296 | rshift = 64 - lshift; |
297 | |
298 | src1 = pws[0]; |
299 | #ifdef __SUNPRO_C |
300 | #pragma pipeloop(0) |
301 | #endif /* __SUNPRO_C */ |
302 | for (i = 0; j <= (b_size - 8); j += 8, i++) { |
303 | src0 = src1; |
304 | src1 = pws[i + 1]; |
305 | pwd[i] = (src0 << lshift) | (src1 >> rshift); |
306 | } |
307 | |
308 | sa += i << 3; |
309 | da += i << 3; |
310 | #endif /* _NO_LONGLONG */ |
311 | } |
312 | |
313 | for (; j < b_size; j++) |
314 | *da++ = *sa++; |
315 | |
316 | j = size & 7; |
317 | |
318 | if (j > 0) { |
319 | mask = mask0 << (8 - j); |
320 | src = da[0]; |
321 | da[0] = (src & (~mask)) | (sa[0] & mask); |
322 | } |
323 | } |
324 | |
325 | /***************************************************************/ |
326 | void mlib_c_ImageCopy_u8(const mlib_image *src, |
327 | mlib_image *dst) |
328 | { |
329 | PREPAREVARS(mlib_u8); |
330 | if (src_width < 16) { |
331 | STRIP(pdst, psrc, src_width, src_height, mlib_u8); |
332 | return; |
333 | } |
334 | |
335 | for (i = 0; i < src_height; i++) { |
336 | mlib_u8 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; |
337 | |
338 | if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) { |
339 | for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) psrc_row) & 7); j++) { |
340 | pdst_row[j] = psrc_row[j]; |
341 | } |
342 | |
343 | #ifdef __SUNPRO_C |
344 | #pragma pipeloop(0) |
345 | #endif /* __SUNPRO_C */ |
346 | for (; j <= (src_width - 8); j += 8) { |
347 | TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j)); |
348 | |
349 | *((TYPE_64BIT *) (pdst_row + j)) = dsrc0; |
350 | } |
351 | } |
352 | else { |
353 | |
354 | #ifdef _NO_LONGLONG |
355 | |
356 | for (j = 0; j < (mlib_s32) ((4 - (mlib_addr) pdst_row) & 3); j++) { |
357 | pdst_row[j] = psrc_row[j]; |
358 | } |
359 | |
360 | if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) { |
361 | #ifdef __SUNPRO_C |
362 | #pragma pipeloop(0) |
363 | #endif /* __SUNPRO_C */ |
364 | for (; j <= (src_width - 4); j += 4) { |
365 | *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j)); |
366 | } |
367 | } |
368 | else { |
369 | mlib_u32 *ps, shl, shr, src0, src1; |
370 | |
371 | ps = (mlib_u32 *) (psrc_row + j); |
372 | shl = (mlib_addr) ps & 3; |
373 | ps = (mlib_u32 *) ((mlib_addr) ps - shl); |
374 | shl <<= 3; |
375 | shr = 32 - shl; |
376 | |
377 | src1 = ps[0]; |
378 | #ifdef __SUNPRO_C |
379 | #pragma pipeloop(0) |
380 | #endif /* __SUNPRO_C */ |
381 | for (; j <= (src_width - 4); j += 4) { |
382 | src0 = src1; |
383 | src1 = ps[1]; |
384 | #ifdef _LITTLE_ENDIAN |
385 | *((mlib_s32 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr); |
386 | #else |
387 | *((mlib_s32 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr); |
388 | #endif /* _LITTLE_ENDIAN */ |
389 | ps++; |
390 | } |
391 | } |
392 | |
393 | #else |
394 | |
395 | for (j = 0; j < (mlib_s32) ((8 - (mlib_addr) pdst_row) & 7); j++) { |
396 | pdst_row[j] = psrc_row[j]; |
397 | } |
398 | |
399 | { |
400 | mlib_s32 shl, shr; |
401 | mlib_u64 *ps, src0, src1; |
402 | |
403 | ps = (mlib_u64 *) (psrc_row + j); |
404 | /* shl and shr are in range [0, 64] */ |
405 | shl = (mlib_s32) ((mlib_addr) ps & 7); |
406 | ps = (mlib_u64 *) ((mlib_addr) ps - shl); |
407 | shl <<= 3; |
408 | shr = 64 - shl; |
409 | |
410 | src1 = ps[0]; |
411 | #ifdef __SUNPRO_C |
412 | #pragma pipeloop(0) |
413 | #endif /* __SUNPRO_C */ |
414 | for (; j <= (src_width - 8); j += 8) { |
415 | src0 = src1; |
416 | src1 = ps[1]; |
417 | #ifdef _LITTLE_ENDIAN |
418 | *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr); |
419 | #else |
420 | *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr); |
421 | #endif /* _LITTLE_ENDIAN */ |
422 | ps++; |
423 | } |
424 | } |
425 | #endif /* _NO_LONGLONG */ |
426 | } |
427 | |
428 | for (; j < src_width; j++) |
429 | pdst_row[j] = psrc_row[j]; |
430 | } |
431 | } |
432 | |
433 | /***************************************************************/ |
434 | void mlib_c_ImageCopy_s16(const mlib_image *src, |
435 | mlib_image *dst) |
436 | { |
437 | PREPAREVARS(mlib_u16); |
438 | if (src_width < 8) { |
439 | STRIP(pdst, psrc, src_width, src_height, mlib_u16); |
440 | return; |
441 | } |
442 | |
443 | for (i = 0; i < src_height; i++) { |
444 | mlib_u16 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; |
445 | |
446 | if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) { |
447 | for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) psrc_row) & 7) >> 1); j++) { |
448 | pdst_row[j] = psrc_row[j]; |
449 | } |
450 | |
451 | #ifdef __SUNPRO_C |
452 | #pragma pipeloop(0) |
453 | #endif /* __SUNPRO_C */ |
454 | for (; j <= (src_width - 4); j += 4) { |
455 | TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j)); |
456 | |
457 | *((TYPE_64BIT *) (pdst_row + j)) = dsrc0; |
458 | } |
459 | } |
460 | else { |
461 | |
462 | #ifdef _NO_LONGLONG |
463 | |
464 | if (j = (((mlib_addr) pdst_row & 2) != 0)) { |
465 | pdst_row[0] = psrc_row[0]; |
466 | } |
467 | |
468 | if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 3)) { |
469 | #ifdef __SUNPRO_C |
470 | #pragma pipeloop(0) |
471 | #endif /* __SUNPRO_C */ |
472 | for (; j <= (src_width - 2); j += 2) { |
473 | *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j)); |
474 | } |
475 | } |
476 | else { |
477 | mlib_u32 *ps, src0, src1; |
478 | |
479 | ps = (mlib_u32 *) (psrc_row + j - 1); |
480 | src1 = ps[0]; |
481 | #ifdef __SUNPRO_C |
482 | #pragma pipeloop(0) |
483 | #endif /* __SUNPRO_C */ |
484 | for (; j <= (src_width - 2); j += 2) { |
485 | src0 = src1; |
486 | src1 = ps[1]; |
487 | #ifdef _LITTLE_ENDIAN |
488 | *((mlib_s32 *) (pdst_row + j)) = (src0 >> 16) | (src1 << 16); |
489 | #else |
490 | *((mlib_s32 *) (pdst_row + j)) = (src0 << 16) | (src1 >> 16); |
491 | #endif /* _LITTLE_ENDIAN */ |
492 | ps++; |
493 | } |
494 | } |
495 | |
496 | #else |
497 | |
498 | for (j = 0; j < (mlib_s32) (((8 - (mlib_addr) pdst_row) & 7) >> 1); j++) { |
499 | pdst_row[j] = psrc_row[j]; |
500 | } |
501 | |
502 | { |
503 | mlib_s32 shl, shr; |
504 | mlib_u64 *ps, src0, src1; |
505 | |
506 | ps = (mlib_u64 *) (psrc_row + j); |
507 | shl = (mlib_s32) ((mlib_addr) ps & 7); |
508 | ps = (mlib_u64 *) ((mlib_addr) ps - shl); |
509 | shl <<= 3; |
510 | shr = 64 - shl; |
511 | |
512 | src1 = ps[0]; |
513 | #ifdef __SUNPRO_C |
514 | #pragma pipeloop(0) |
515 | #endif /* __SUNPRO_C */ |
516 | for (; j <= (src_width - 4); j += 4) { |
517 | src0 = src1; |
518 | src1 = ps[1]; |
519 | #ifdef _LITTLE_ENDIAN |
520 | *((mlib_s64 *) (pdst_row + j)) = (src0 >> shl) | (src1 << shr); |
521 | #else |
522 | *((mlib_s64 *) (pdst_row + j)) = (src0 << shl) | (src1 >> shr); |
523 | #endif /* _LITTLE_ENDIAN */ |
524 | ps++; |
525 | } |
526 | } |
527 | #endif /* _NO_LONGLONG */ |
528 | } |
529 | |
530 | for (; j < src_width; j++) |
531 | pdst_row[j] = psrc_row[j]; |
532 | } |
533 | } |
534 | |
535 | /***************************************************************/ |
536 | void mlib_c_ImageCopy_s32(const mlib_image *src, |
537 | mlib_image *dst) |
538 | { |
539 | PREPAREVARS(mlib_u32); |
540 | if (src_width < 4) { |
541 | STRIP(pdst, psrc, src_width, src_height, mlib_u32); |
542 | return; |
543 | } |
544 | |
545 | for (i = 0; i < src_height; i++) { |
546 | mlib_u32 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; |
547 | |
548 | if (!(((mlib_addr) psrc_row ^ (mlib_addr) pdst_row) & 7)) { |
549 | j = (mlib_s32) ((mlib_addr) psrc_row & 4) >> 2; |
550 | if (j != 0) { |
551 | pdst_row[0] = psrc_row[0]; |
552 | } |
553 | |
554 | #ifdef __SUNPRO_C |
555 | #pragma pipeloop(0) |
556 | #endif /* __SUNPRO_C */ |
557 | for (; j <= (src_width - 2); j += 2) { |
558 | TYPE_64BIT dsrc0 = *((TYPE_64BIT *) (psrc_row + j)); |
559 | |
560 | *((TYPE_64BIT *) (pdst_row + j)) = dsrc0; |
561 | } |
562 | } |
563 | else { |
564 | |
565 | #ifdef _NO_LONGLONG |
566 | |
567 | #ifdef __SUNPRO_C |
568 | #pragma pipeloop(0) |
569 | #endif /* __SUNPRO_C */ |
570 | for (j = 0; j <= (src_width - 1); j++) { |
571 | *((mlib_s32 *) (pdst_row + j)) = *((mlib_s32 *) (psrc_row + j)); |
572 | } |
573 | |
574 | #else |
575 | |
576 | { |
577 | mlib_u64 *ps, src0, src1; |
578 | |
579 | j = (mlib_s32) ((mlib_addr) pdst_row & 4) >> 2; |
580 | if (j != 0) { |
581 | pdst_row[0] = psrc_row[0]; |
582 | } |
583 | ps = (mlib_u64 *) (psrc_row + j - 1); |
584 | src1 = ps[0]; |
585 | #ifdef __SUNPRO_C |
586 | #pragma pipeloop(0) |
587 | #endif /* __SUNPRO_C */ |
588 | for (; j <= (src_width - 2); j += 2) { |
589 | src0 = src1; |
590 | src1 = ps[1]; |
591 | #ifdef _LITTLE_ENDIAN |
592 | *((mlib_s64 *) (pdst_row + j)) = (src0 >> 32) | (src1 << 32); |
593 | #else |
594 | *((mlib_s64 *) (pdst_row + j)) = (src0 << 32) | (src1 >> 32); |
595 | #endif /* _LITTLE_ENDIAN */ |
596 | ps++; |
597 | } |
598 | } |
599 | #endif /* _NO_LONGLONG */ |
600 | } |
601 | |
602 | for (; j < src_width; j++) |
603 | pdst_row[j] = psrc_row[j]; |
604 | } |
605 | } |
606 | |
607 | /***************************************************************/ |
608 | void mlib_c_ImageCopy_d64(const mlib_image *src, |
609 | mlib_image *dst) |
610 | { |
611 | PREPAREVARS(mlib_d64); |
612 | for (i = 0; i < src_height; i++) { |
613 | mlib_d64 *psrc_row = psrc + i * src_stride, *pdst_row = pdst + i * dst_stride; |
614 | |
615 | #ifdef __SUNPRO_C |
616 | #pragma pipeloop(0) |
617 | #endif /* __SUNPRO_C */ |
618 | for (j = 0; j < src_width; j++) |
619 | *((mlib_d64 *) (pdst_row + j)) = *((mlib_d64 *) (psrc_row + j)); |
620 | } |
621 | } |
622 | |
623 | /***************************************************************/ |
624 | /* |
625 | * Both source and destination image data are 1 - d vectors and |
626 | * 8 - byte aligned. And size is in 8 - bytes. |
627 | */ |
628 | |
629 | void mlib_c_ImageCopy_a1(const TYPE_64BIT *sp, |
630 | TYPE_64BIT *dp, |
631 | mlib_s32 size) |
632 | { |
633 | mlib_s32 i; |
634 | |
635 | #ifdef __SUNPRO_C |
636 | #pragma pipeloop(0) |
637 | #endif /* __SUNPRO_C */ |
638 | for (i = 0; i < size; i++) { |
639 | *dp++ = *sp++; |
640 | } |
641 | } |
642 | |
643 | /***************************************************************/ |
644 | #ifndef _NO_LONGLONG |
645 | #define TYPE mlib_u64 |
646 | #define BSIZE 64 |
647 | #define SIZE 8 |
648 | #else |
649 | #define TYPE mlib_u32 |
650 | #define BSIZE 32 |
651 | #define SIZE 4 |
652 | #endif /* _NO_LONGLONG */ |
653 | |
654 | /***************************************************************/ |
655 | void mlib_ImageCopy_na(const mlib_u8 *sp, |
656 | mlib_u8 *dp, |
657 | mlib_s32 n) |
658 | { |
659 | mlib_s32 shr, shl; |
660 | TYPE *tmp, s0, s1; |
661 | |
662 | if (((mlib_addr) sp ^ (mlib_addr) dp) & 7) { |
663 | |
664 | #ifdef __SUNPRO_C |
665 | #pragma pipeloop(0) |
666 | #endif /* __SUNPRO_C */ |
667 | for (; (n > 0) && (mlib_addr) dp & (SIZE - 1); n--) |
668 | *dp++ = *sp++; |
669 | |
670 | #ifdef _NO_LONGLONG |
671 | |
672 | if (((mlib_addr) sp & (SIZE - 1)) == 0) { |
673 | for (; n > SIZE; n -= SIZE) { |
674 | *(TYPE *) dp = *(TYPE *) sp; |
675 | dp += SIZE; |
676 | sp += SIZE; |
677 | } |
678 | } |
679 | else |
680 | #endif /* _NO_LONGLONG */ |
681 | { |
682 | tmp = (TYPE *) ((mlib_addr) sp & ~(SIZE - 1)); |
683 | /* shl and shr do not exceed 64 here */ |
684 | shl = (mlib_s32) (((mlib_addr) sp & (SIZE - 1)) << 3); |
685 | shr = BSIZE - shl; |
686 | s0 = *tmp++; |
687 | |
688 | #ifdef __SUNPRO_C |
689 | #pragma pipeloop(0) |
690 | #endif /* __SUNPRO_C */ |
691 | for (; n > SIZE; n -= SIZE) { |
692 | s1 = *tmp++; |
693 | #ifdef _LITTLE_ENDIAN |
694 | *(TYPE *) dp = (s0 >> shl) | (s1 << shr); |
695 | #else |
696 | *(TYPE *) dp = (s0 << shl) | (s1 >> shr); |
697 | #endif /* _LITTLE_ENDIAN */ |
698 | s0 = s1; |
699 | dp += SIZE; |
700 | sp += SIZE; |
701 | } |
702 | } |
703 | } |
704 | else { |
705 | #ifdef __SUNPRO_C |
706 | #pragma pipeloop(0) |
707 | #endif /* __SUNPRO_C */ |
708 | for (; (n > 0) && (mlib_addr) dp & 7; n--) |
709 | *dp++ = *sp++; |
710 | |
711 | #ifdef __SUNPRO_C |
712 | #pragma pipeloop(0) |
713 | #endif /* __SUNPRO_C */ |
714 | for (; n > 8; n -= 8) { |
715 | *(TYPE_64BIT *) dp = *(TYPE_64BIT *) sp; |
716 | dp += 8; |
717 | sp += 8; |
718 | } |
719 | } |
720 | |
721 | #ifdef __SUNPRO_C |
722 | #pragma pipeloop(0) |
723 | #endif /* __SUNPRO_C */ |
724 | for (; n > 0; n--) |
725 | *dp++ = *sp++; |
726 | } |
727 | |
728 | /***************************************************************/ |
729 | #ifdef _MSC_VER |
730 | #pragma optimize("", on) |
731 | #endif /* _MSC_VER */ |
732 | |
733 | /***************************************************************/ |
734 | |