1 | /* |
2 | * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | |
27 | /* |
28 | * FUNCTIONS |
29 | * mlib_ImageCopy_bit_na - BIT, non-aligned |
30 | * mlib_ImageCopy_bit_na_r - BIT, non-aligned, reverse |
31 | * |
32 | * SYNOPSIS |
33 | * |
34 | * void mlib_ImageCopy_bit_na(const mlib_u8 *sa, |
35 | * mlib_u8 *da, |
36 | * mlib_s32 size, |
37 | * mlib_s32 s_offset, |
38 | * mlib_s32 d_offset); |
39 | * void mlib_ImageCopy_bit_na_r(const mlib_u8 *sa, |
40 | * mlib_u8 *da, |
41 | * mlib_s32 size, |
42 | * mlib_s32 s_offset, |
43 | * mlib_s32 d_offset); |
44 | * ARGUMENT |
45 | * sp pointer to source image data |
46 | * dp pointer to destination image data |
47 | * size size in 8-bytes, bytes, or SHORTs |
48 | * width image width in 8-bytes |
49 | * height image height in lines |
50 | * stride source image line stride in 8-bytes |
51 | * dstride destination image line stride in 8-bytes |
52 | * s_offset source image line bit offset |
53 | * d_offset destination image line bit offset |
54 | * |
55 | * DESCRIPTION |
56 | * Direct copy from one image to another -- C version low level |
57 | * functions. |
58 | */ |
59 | |
60 | #include <stdlib.h> |
61 | #include "mlib_image.h" |
62 | #include "mlib_ImageCopy.h" |
63 | |
64 | /***************************************************************/ |
65 | /* |
66 | * Bit offsets of source and distination are not the same |
67 | */ |
68 | |
69 | void mlib_ImageCopy_bit_na(const mlib_u8 *sa, |
70 | mlib_u8 *da, |
71 | mlib_s32 size, |
72 | mlib_s32 s_offset, |
73 | mlib_s32 d_offset) |
74 | { |
75 | #ifdef _NO_LONGLONG |
76 | |
77 | mlib_u32 *dp; /* 4-byte aligned start points in dst */ |
78 | mlib_u32 *sp; /* 4-byte aligned start point in src */ |
79 | mlib_s32 j; /* offset of address in dst */ |
80 | mlib_u32 mask0 = 0xFFFFFFFF; |
81 | mlib_u32 dmask; |
82 | mlib_u32 src, src0, src1, dst; |
83 | mlib_s32 ls_offset, ld_offset, shift; |
84 | |
85 | if (size <= 0) return; |
86 | |
87 | /* prepare the destination addresses */ |
88 | dp = (mlib_u32 *)((mlib_addr)da & (~3)); |
89 | sp = (mlib_u32 *)((mlib_addr)sa & (~3)); |
90 | ld_offset = (((mlib_addr)da & 3) << 3) + d_offset; /* bit d_offset to first mlib_s32 */ |
91 | ls_offset = (((mlib_addr)sa & 3) << 3) + s_offset; /* bit d_offset to first mlib_s32 */ |
92 | |
93 | if (ld_offset > ls_offset) { |
94 | src0 = sp[0]; |
95 | dst = dp[0]; |
96 | if (ld_offset + size < 32) { |
97 | dmask = (mask0 << (32 - size)) >> ld_offset; |
98 | #ifdef _LITTLE_ENDIAN |
99 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
100 | src = (src0 >> (ld_offset - ls_offset)); |
101 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
102 | dst = (dst & (~dmask)) | (src & dmask); |
103 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
104 | #else |
105 | src = (src0 >> (ld_offset - ls_offset)); |
106 | dp[0] = (dst & (~dmask)) | (src & dmask); |
107 | #endif /* _LITTLE_ENDIAN */ |
108 | return; |
109 | } |
110 | |
111 | dmask = mask0 >> ld_offset; |
112 | #ifdef _LITTLE_ENDIAN |
113 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
114 | src = (src0 >> (ld_offset - ls_offset)); |
115 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
116 | dst = (dst & ~dmask) | (src & dmask); |
117 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
118 | #else |
119 | src = (src0 >> (ld_offset - ls_offset)); |
120 | dp[0] = (dst & ~dmask) | (src & dmask); |
121 | #endif /* _LITTLE_ENDIAN */ |
122 | j = 32 - ld_offset; |
123 | dp++; |
124 | ls_offset += j; |
125 | } else { |
126 | |
127 | shift = ls_offset - ld_offset; |
128 | src0 = sp[0]; |
129 | if (ls_offset + size > 32) src1 = sp[1]; |
130 | dst = dp[0]; |
131 | |
132 | if (ld_offset + size < 32) { |
133 | dmask = (mask0 << (32 - size)) >> ld_offset; |
134 | #ifdef _LITTLE_ENDIAN |
135 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
136 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
137 | src = (src0 << shift) | (src1 >> (32 - shift)); |
138 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
139 | dst = (dst & ~dmask) | (src & dmask); |
140 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
141 | #else |
142 | src = (src0 << shift) | (src1 >> (32 - shift)); |
143 | dp[0] = (dst & ~dmask) | (src & dmask); |
144 | #endif /* _LITTLE_ENDIAN */ |
145 | return; |
146 | } |
147 | |
148 | dmask = mask0 >> ld_offset; |
149 | #ifdef _LITTLE_ENDIAN |
150 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
151 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
152 | src = (src0 << shift) | (src1 >> (32 - shift)); |
153 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
154 | dst = (dst & ~dmask) | (src & dmask); |
155 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
156 | #else |
157 | src = (src0 << shift) | (src1 >> (32 - shift)); |
158 | dp[0] = (dst & ~dmask) | (src & dmask); |
159 | #endif /* _LITTLE_ENDIAN */ |
160 | j = 32 - ld_offset; |
161 | dp++; |
162 | sp++; |
163 | ls_offset = ls_offset + j - 32; |
164 | } |
165 | |
166 | if (j < size) src1 = sp[0]; |
167 | #ifdef _LITTLE_ENDIAN |
168 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
169 | #endif /* _LITTLE_ENDIAN */ |
170 | for (; j <= size - 32; j += 32) { |
171 | src0 = src1; |
172 | src1 = sp[1]; |
173 | #ifdef _LITTLE_ENDIAN |
174 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
175 | src = (src0 << ls_offset) | (src1 >> (32 - ls_offset)); |
176 | dp[0] = (src << 24) | ((src & 0xFF00) << 8) | ((src >> 8) & 0xFF00) | (src >> 24); |
177 | #else |
178 | dp[0] = (src0 << ls_offset) | (src1 >> (32 - ls_offset)); |
179 | #endif /* _LITTLE_ENDIAN */ |
180 | sp++; |
181 | dp++; |
182 | } |
183 | |
184 | if (j < size) { |
185 | j = size - j; |
186 | src0 = src1; |
187 | if (ls_offset + j > 32) src1 = sp[1]; |
188 | dst = dp[0]; |
189 | dmask = mask0 << (32 - j); |
190 | #ifdef _LITTLE_ENDIAN |
191 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
192 | src = (src0 << ls_offset) | (src1 >> (32 - ls_offset)); |
193 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
194 | dst = (dst & ~dmask) | (src & dmask); |
195 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
196 | #else |
197 | src = (src0 << ls_offset) | (src1 >> (32 - ls_offset)); |
198 | dp[0] = (dst & ~dmask) | (src & dmask); |
199 | #endif /* _LITTLE_ENDIAN */ |
200 | } |
201 | |
202 | #else /* _LONGLONG */ |
203 | |
204 | mlib_u64 *dp; /* 8-byte aligned start points in dst */ |
205 | mlib_u64 *sp; /* 8-byte aligned start point in src */ |
206 | mlib_s32 j; /* offset of address in dst */ |
207 | mlib_u64 lmask0 = 0xFFFFFFFFFFFFFFFFULL; |
208 | mlib_u64 dmask; |
209 | mlib_u64 lsrc, lsrc0, lsrc1 = 0ULL, ldst; |
210 | mlib_s32 ls_offset, ld_offset, shift; |
211 | |
212 | if (size <= 0) return; |
213 | |
214 | /* prepare the destination addresses */ |
215 | dp = (mlib_u64 *)((mlib_addr)da & (~7)); |
216 | sp = (mlib_u64 *)((mlib_addr)sa & (~7)); |
217 | /* we can explicitly cast ro mlib_s32 here because value is in [0,64] range */ |
218 | ld_offset = (((mlib_s32) ((mlib_addr)da & 7)) << 3) + d_offset; /* bit d_offset to first mlib_d64 */ |
219 | ls_offset = (((mlib_s32) ((mlib_addr)sa & 7)) << 3) + s_offset; /* bit d_offset to first mlib_d64 */ |
220 | |
221 | if (ld_offset > ls_offset) { |
222 | lsrc0 = sp[0]; |
223 | ldst = dp[0]; |
224 | lsrc = (lsrc0 >> (ld_offset - ls_offset)); |
225 | if (ld_offset + size < 64) { |
226 | dmask = (lmask0 << (64 - size)) >> ld_offset; |
227 | dp[0] = (ldst & (~dmask)) | (lsrc & dmask); |
228 | return; |
229 | } |
230 | |
231 | dmask = lmask0 >> ld_offset; |
232 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
233 | j = 64 - ld_offset; |
234 | dp++; |
235 | ls_offset += j; |
236 | } else { |
237 | |
238 | shift = ls_offset - ld_offset; |
239 | lsrc0 = sp[0]; |
240 | if (ls_offset + size > 64) lsrc1 = sp[1]; |
241 | ldst = dp[0]; |
242 | lsrc = (lsrc0 << shift) | (lsrc1 >> (64 - shift)); |
243 | |
244 | if (ld_offset + size < 64) { |
245 | dmask = (lmask0 << (64 - size)) >> ld_offset; |
246 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
247 | return; |
248 | } |
249 | |
250 | dmask = lmask0 >> ld_offset; |
251 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
252 | j = 64 - ld_offset; |
253 | dp++; |
254 | sp++; |
255 | ls_offset = ls_offset + j - 64; |
256 | } |
257 | |
258 | if (j < size) lsrc1 = sp[0]; |
259 | #ifdef __SUNPRO_C |
260 | #pragma pipeloop(0) |
261 | #endif /* __SUNPRO_C */ |
262 | for (; j <= size - 64; j += 64) { |
263 | lsrc0 = lsrc1; |
264 | lsrc1 = sp[1]; |
265 | lsrc = (lsrc0 << ls_offset) | (lsrc1 >> (64 - ls_offset)); |
266 | dp[0] = lsrc; |
267 | sp++; |
268 | dp++; |
269 | } |
270 | |
271 | if (j < size) { |
272 | j = size - j; |
273 | lsrc0 = lsrc1; |
274 | if (ls_offset + j > 64) lsrc1 = sp[1]; |
275 | ldst = dp[0]; |
276 | dmask = lmask0 << (64 - j); |
277 | lsrc = (lsrc0 << ls_offset) | (lsrc1 >> (64 - ls_offset)); |
278 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
279 | } |
280 | #endif /* _NO_LONGLONG */ |
281 | } |
282 | |
283 | /***************************************************************/ |
284 | /* |
285 | * Bit offsets of source and distination are not the same |
286 | * This function is both for C and VIS version (LONGLONG case) |
287 | */ |
288 | |
289 | void mlib_ImageCopy_bit_na_r(const mlib_u8 *sa, |
290 | mlib_u8 *da, |
291 | mlib_s32 size, |
292 | mlib_s32 s_offset, |
293 | mlib_s32 d_offset) |
294 | { |
295 | #ifdef _NO_LONGLONG |
296 | |
297 | mlib_u32 *dp; /* 4-byte aligned start points in dst */ |
298 | mlib_u32 *sp; /* 4-byte aligned start point in src */ |
299 | mlib_s32 j; /* offset of address in dst */ |
300 | mlib_u32 lmask0 = 0xFFFFFFFF; |
301 | mlib_u32 dmask; |
302 | mlib_u32 src, src0, src1, dst; |
303 | mlib_s32 ls_offset, ld_offset, shift; |
304 | |
305 | if (size <= 0) return; |
306 | |
307 | /* prepare the destination addresses */ |
308 | dp = (mlib_u32 *)((mlib_addr)da & (~3)); |
309 | sp = (mlib_u32 *)((mlib_addr)sa & (~3)); |
310 | ld_offset = (((mlib_addr)da & 3) << 3) + d_offset; /* bit d_offset to first mlib_s32 */ |
311 | ls_offset = (((mlib_addr)sa & 3) << 3) + s_offset; /* bit d_offset to first mlib_s32 */ |
312 | |
313 | if (ld_offset < ls_offset) { |
314 | src0 = sp[0]; |
315 | dst = dp[0]; |
316 | if (ld_offset >= size) { |
317 | dmask = (lmask0 << (32 - size)) >> (ld_offset - size); |
318 | #ifdef _LITTLE_ENDIAN |
319 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
320 | src = (src0 << (ls_offset - ld_offset)); |
321 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
322 | dst = (dst & (~dmask)) | (src & dmask); |
323 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
324 | #else |
325 | src = (src0 << (ls_offset - ld_offset)); |
326 | dp[0] = (dst & (~dmask)) | (src & dmask); |
327 | #endif /* _LITTLE_ENDIAN */ |
328 | return; |
329 | } |
330 | |
331 | dmask = lmask0 << (32 - ld_offset); |
332 | #ifdef _LITTLE_ENDIAN |
333 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
334 | src = (src0 << (ls_offset - ld_offset)); |
335 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
336 | dst = (dst & ~dmask) | (src & dmask); |
337 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
338 | #else |
339 | src = (src0 << (ls_offset - ld_offset)); |
340 | dp[0] = (dst & ~dmask) | (src & dmask); |
341 | #endif /* _LITTLE_ENDIAN */ |
342 | j = ld_offset; |
343 | dp--; |
344 | ls_offset -= j; |
345 | } else { |
346 | |
347 | shift = ld_offset - ls_offset; |
348 | src0 = sp[0]; |
349 | if (ls_offset < size) src1 = sp[-1]; |
350 | dst = dp[0]; |
351 | |
352 | if (ld_offset >= size) { |
353 | dmask = (lmask0 << (32 - size)) >> (ld_offset - size); |
354 | #ifdef _LITTLE_ENDIAN |
355 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
356 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
357 | src = (src0 >> shift) | (src1 << (32 - shift)); |
358 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
359 | dst = (dst & ~dmask) | (src & dmask); |
360 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
361 | #else |
362 | src = (src0 >> shift) | (src1 << (32 - shift)); |
363 | dp[0] = (dst & ~dmask) | (src & dmask); |
364 | #endif /* _LITTLE_ENDIAN */ |
365 | return; |
366 | } |
367 | |
368 | dmask = lmask0 << (32 - ld_offset); |
369 | #ifdef _LITTLE_ENDIAN |
370 | src0 = (src0 << 24) | ((src0 & 0xFF00) << 8) | ((src0 >> 8) & 0xFF00) | (src0 >> 24); |
371 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
372 | src = (src0 >> shift) | (src1 << (32 - shift)); |
373 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
374 | dst = (dst & ~dmask) | (src & dmask); |
375 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
376 | #else |
377 | src = (src0 >> shift) | (src1 << (32 - shift)); |
378 | dp[0] = (dst & ~dmask) | (src & dmask); |
379 | #endif /* _LITTLE_ENDIAN */ |
380 | j = ld_offset; |
381 | dp--; |
382 | sp--; |
383 | ls_offset = ls_offset - j + 32; |
384 | } |
385 | |
386 | if (j < size) src1 = sp[0]; |
387 | #ifdef _LITTLE_ENDIAN |
388 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
389 | #endif /* _LITTLE_ENDIAN */ |
390 | #ifdef __SUNPRO_C |
391 | #pragma pipeloop(0) |
392 | #endif /* __SUNPRO_C */ |
393 | for (; j <= size - 32; j += 32) { |
394 | src0 = src1; |
395 | src1 = sp[-1]; |
396 | #ifdef _LITTLE_ENDIAN |
397 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
398 | src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset); |
399 | dp[0] = (src << 24) | ((src & 0xFF00) << 8) | ((src >> 8) & 0xFF00) | (src >> 24); |
400 | #else |
401 | dp[0] = (src0 >> (32 - ls_offset)) | (src1 << ls_offset); |
402 | #endif /* _LITTLE_ENDIAN */ |
403 | sp--; |
404 | dp--; |
405 | } |
406 | |
407 | if (j < size) { |
408 | j = size - j; |
409 | src0 = src1; |
410 | if (ls_offset < j) src1 = sp[-1]; |
411 | dst = dp[0]; |
412 | dmask = lmask0 >> (32 - j); |
413 | #ifdef _LITTLE_ENDIAN |
414 | src1 = (src1 << 24) | ((src1 & 0xFF00) << 8) | ((src1 >> 8) & 0xFF00) | (src1 >> 24); |
415 | src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset); |
416 | dst = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
417 | dst = (dst & ~dmask) | (src & dmask); |
418 | dp[0] = (dst << 24) | ((dst & 0xFF00) << 8) | ((dst >> 8) & 0xFF00) | (dst >> 24); |
419 | #else |
420 | src = (src0 >> (32 - ls_offset)) | (src1 << ls_offset); |
421 | dp[0] = (dst & ~dmask) | (src & dmask); |
422 | #endif /* _LITTLE_ENDIAN */ |
423 | } |
424 | |
425 | #else /* _LONGLONG */ |
426 | |
427 | mlib_u64 *dp; /* 8-byte aligned start points in dst */ |
428 | mlib_u64 *sp; /* 8-byte aligned start point in src */ |
429 | mlib_s32 j; /* offset of address in dst */ |
430 | mlib_u64 lmask0 = 0xFFFFFFFFFFFFFFFFULL; |
431 | mlib_u64 dmask; |
432 | mlib_u64 lsrc, lsrc0, lsrc1 = 0ULL, ldst; |
433 | mlib_s32 ls_offset, ld_offset, shift; |
434 | |
435 | if (size <= 0) return; |
436 | |
437 | /* prepare the destination addresses */ |
438 | dp = (mlib_u64 *)((mlib_addr)da & (~7)); |
439 | sp = (mlib_u64 *)((mlib_addr)sa & (~7)); |
440 | /* we can explicitly cast ro mlib_s32 here because value is in [0,64] range */ |
441 | ld_offset = (((mlib_s32) ((mlib_addr)da & 7)) << 3) + d_offset; /* bit d_offset to first mlib_d64 */ |
442 | ls_offset = (((mlib_s32) ((mlib_addr)sa & 7)) << 3) + s_offset; /* bit d_offset to first mlib_d64 */ |
443 | |
444 | if (ld_offset < ls_offset) { |
445 | lsrc0 = sp[0]; |
446 | ldst = dp[0]; |
447 | lsrc = (lsrc0 << (ls_offset - ld_offset)); |
448 | if (ld_offset >= size) { |
449 | dmask = (lmask0 << (64 - size)) >> (ld_offset - size); |
450 | dp[0] = (ldst & (~dmask)) | (lsrc & dmask); |
451 | return; |
452 | } |
453 | |
454 | dmask = lmask0 << (64 - ld_offset); |
455 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
456 | j = ld_offset; |
457 | dp--; |
458 | ls_offset -= j; |
459 | } else { |
460 | |
461 | shift = ld_offset - ls_offset; |
462 | lsrc0 = sp[0]; |
463 | if (ls_offset < size) lsrc1 = sp[-1]; |
464 | ldst = dp[0]; |
465 | lsrc = (lsrc0 >> shift) | (lsrc1 << (64 - shift)); |
466 | if (ld_offset >= size) { |
467 | dmask = (lmask0 << (64 - size)) >> (ld_offset - size); |
468 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
469 | return; |
470 | } |
471 | |
472 | dmask = lmask0 << (64 - ld_offset); |
473 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
474 | j = ld_offset; |
475 | dp--; |
476 | sp--; |
477 | ls_offset = ls_offset - j + 64; |
478 | } |
479 | |
480 | if (j < size) lsrc1 = sp[0]; |
481 | #ifdef __SUNPRO_C |
482 | #pragma pipeloop(0) |
483 | #endif /* __SUNPRO_C */ |
484 | for (; j <= size - 64; j += 64) { |
485 | lsrc0 = lsrc1; |
486 | lsrc1 = sp[-1]; |
487 | dp[0] = (lsrc0 >> (64 - ls_offset)) | (lsrc1 << ls_offset); |
488 | sp--; |
489 | dp--; |
490 | } |
491 | |
492 | if (j < size) { |
493 | j = size - j; |
494 | lsrc0 = lsrc1; |
495 | if (ls_offset < j) lsrc1 = sp[-1]; |
496 | ldst = dp[0]; |
497 | dmask = lmask0 >> (64 - j); |
498 | lsrc = (lsrc0 >> (64 - ls_offset)) | (lsrc1 << ls_offset); |
499 | dp[0] = (ldst & ~dmask) | (lsrc & dmask); |
500 | } |
501 | #endif /* _NO_LONGLONG */ |
502 | } |
503 | |
504 | /***************************************************************/ |
505 | |