1 | /* |
2 | * VIS op helpers |
3 | * |
4 | * Copyright (c) 2003-2005 Fabrice Bellard |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
18 | */ |
19 | |
20 | #include "qemu/osdep.h" |
21 | #include "cpu.h" |
22 | #include "exec/helper-proto.h" |
23 | |
24 | /* This function uses non-native bit order */ |
25 | #define GET_FIELD(X, FROM, TO) \ |
26 | ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) |
27 | |
28 | /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ |
29 | #define GET_FIELD_SP(X, FROM, TO) \ |
30 | GET_FIELD(X, 63 - (TO), 63 - (FROM)) |
31 | |
32 | target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) |
33 | { |
34 | return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | |
35 | (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | |
36 | (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | |
37 | (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | |
38 | (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | |
39 | (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | |
40 | (((pixel_addr >> 55) & 1) << 4) | |
41 | (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | |
42 | GET_FIELD_SP(pixel_addr, 11, 12); |
43 | } |
44 | |
45 | #ifdef HOST_WORDS_BIGENDIAN |
46 | #define VIS_B64(n) b[7 - (n)] |
47 | #define VIS_W64(n) w[3 - (n)] |
48 | #define VIS_SW64(n) sw[3 - (n)] |
49 | #define VIS_L64(n) l[1 - (n)] |
50 | #define VIS_B32(n) b[3 - (n)] |
51 | #define VIS_W32(n) w[1 - (n)] |
52 | #else |
53 | #define VIS_B64(n) b[n] |
54 | #define VIS_W64(n) w[n] |
55 | #define VIS_SW64(n) sw[n] |
56 | #define VIS_L64(n) l[n] |
57 | #define VIS_B32(n) b[n] |
58 | #define VIS_W32(n) w[n] |
59 | #endif |
60 | |
61 | typedef union { |
62 | uint8_t b[8]; |
63 | uint16_t w[4]; |
64 | int16_t sw[4]; |
65 | uint32_t l[2]; |
66 | uint64_t ll; |
67 | float64 d; |
68 | } VIS64; |
69 | |
70 | typedef union { |
71 | uint8_t b[4]; |
72 | uint16_t w[2]; |
73 | uint32_t l; |
74 | float32 f; |
75 | } VIS32; |
76 | |
77 | uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) |
78 | { |
79 | VIS64 s, d; |
80 | |
81 | s.ll = src1; |
82 | d.ll = src2; |
83 | |
84 | /* Reverse calculation order to handle overlap */ |
85 | d.VIS_B64(7) = s.VIS_B64(3); |
86 | d.VIS_B64(6) = d.VIS_B64(3); |
87 | d.VIS_B64(5) = s.VIS_B64(2); |
88 | d.VIS_B64(4) = d.VIS_B64(2); |
89 | d.VIS_B64(3) = s.VIS_B64(1); |
90 | d.VIS_B64(2) = d.VIS_B64(1); |
91 | d.VIS_B64(1) = s.VIS_B64(0); |
92 | /* d.VIS_B64(0) = d.VIS_B64(0); */ |
93 | |
94 | return d.ll; |
95 | } |
96 | |
97 | uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) |
98 | { |
99 | VIS64 s, d; |
100 | uint32_t tmp; |
101 | |
102 | s.ll = src1; |
103 | d.ll = src2; |
104 | |
105 | #define PMUL(r) \ |
106 | tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ |
107 | if ((tmp & 0xff) > 0x7f) { \ |
108 | tmp += 0x100; \ |
109 | } \ |
110 | d.VIS_W64(r) = tmp >> 8; |
111 | |
112 | PMUL(0); |
113 | PMUL(1); |
114 | PMUL(2); |
115 | PMUL(3); |
116 | #undef PMUL |
117 | |
118 | return d.ll; |
119 | } |
120 | |
121 | uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) |
122 | { |
123 | VIS64 s, d; |
124 | uint32_t tmp; |
125 | |
126 | s.ll = src1; |
127 | d.ll = src2; |
128 | |
129 | #define PMUL(r) \ |
130 | tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ |
131 | if ((tmp & 0xff) > 0x7f) { \ |
132 | tmp += 0x100; \ |
133 | } \ |
134 | d.VIS_W64(r) = tmp >> 8; |
135 | |
136 | PMUL(0); |
137 | PMUL(1); |
138 | PMUL(2); |
139 | PMUL(3); |
140 | #undef PMUL |
141 | |
142 | return d.ll; |
143 | } |
144 | |
145 | uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) |
146 | { |
147 | VIS64 s, d; |
148 | uint32_t tmp; |
149 | |
150 | s.ll = src1; |
151 | d.ll = src2; |
152 | |
153 | #define PMUL(r) \ |
154 | tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ |
155 | if ((tmp & 0xff) > 0x7f) { \ |
156 | tmp += 0x100; \ |
157 | } \ |
158 | d.VIS_W64(r) = tmp >> 8; |
159 | |
160 | PMUL(0); |
161 | PMUL(1); |
162 | PMUL(2); |
163 | PMUL(3); |
164 | #undef PMUL |
165 | |
166 | return d.ll; |
167 | } |
168 | |
169 | uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) |
170 | { |
171 | VIS64 s, d; |
172 | uint32_t tmp; |
173 | |
174 | s.ll = src1; |
175 | d.ll = src2; |
176 | |
177 | #define PMUL(r) \ |
178 | tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ |
179 | if ((tmp & 0xff) > 0x7f) { \ |
180 | tmp += 0x100; \ |
181 | } \ |
182 | d.VIS_W64(r) = tmp >> 8; |
183 | |
184 | PMUL(0); |
185 | PMUL(1); |
186 | PMUL(2); |
187 | PMUL(3); |
188 | #undef PMUL |
189 | |
190 | return d.ll; |
191 | } |
192 | |
193 | uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) |
194 | { |
195 | VIS64 s, d; |
196 | uint32_t tmp; |
197 | |
198 | s.ll = src1; |
199 | d.ll = src2; |
200 | |
201 | #define PMUL(r) \ |
202 | tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ |
203 | if ((tmp & 0xff) > 0x7f) { \ |
204 | tmp += 0x100; \ |
205 | } \ |
206 | d.VIS_W64(r) = tmp >> 8; |
207 | |
208 | PMUL(0); |
209 | PMUL(1); |
210 | PMUL(2); |
211 | PMUL(3); |
212 | #undef PMUL |
213 | |
214 | return d.ll; |
215 | } |
216 | |
217 | uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) |
218 | { |
219 | VIS64 s, d; |
220 | uint32_t tmp; |
221 | |
222 | s.ll = src1; |
223 | d.ll = src2; |
224 | |
225 | #define PMUL(r) \ |
226 | tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ |
227 | if ((tmp & 0xff) > 0x7f) { \ |
228 | tmp += 0x100; \ |
229 | } \ |
230 | d.VIS_L64(r) = tmp; |
231 | |
232 | /* Reverse calculation order to handle overlap */ |
233 | PMUL(1); |
234 | PMUL(0); |
235 | #undef PMUL |
236 | |
237 | return d.ll; |
238 | } |
239 | |
240 | uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) |
241 | { |
242 | VIS64 s, d; |
243 | uint32_t tmp; |
244 | |
245 | s.ll = src1; |
246 | d.ll = src2; |
247 | |
248 | #define PMUL(r) \ |
249 | tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ |
250 | if ((tmp & 0xff) > 0x7f) { \ |
251 | tmp += 0x100; \ |
252 | } \ |
253 | d.VIS_L64(r) = tmp; |
254 | |
255 | /* Reverse calculation order to handle overlap */ |
256 | PMUL(1); |
257 | PMUL(0); |
258 | #undef PMUL |
259 | |
260 | return d.ll; |
261 | } |
262 | |
263 | uint64_t helper_fexpand(uint64_t src1, uint64_t src2) |
264 | { |
265 | VIS32 s; |
266 | VIS64 d; |
267 | |
268 | s.l = (uint32_t)src1; |
269 | d.ll = src2; |
270 | d.VIS_W64(0) = s.VIS_B32(0) << 4; |
271 | d.VIS_W64(1) = s.VIS_B32(1) << 4; |
272 | d.VIS_W64(2) = s.VIS_B32(2) << 4; |
273 | d.VIS_W64(3) = s.VIS_B32(3) << 4; |
274 | |
275 | return d.ll; |
276 | } |
277 | |
278 | #define VIS_HELPER(name, F) \ |
279 | uint64_t name##16(uint64_t src1, uint64_t src2) \ |
280 | { \ |
281 | VIS64 s, d; \ |
282 | \ |
283 | s.ll = src1; \ |
284 | d.ll = src2; \ |
285 | \ |
286 | d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ |
287 | d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ |
288 | d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ |
289 | d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ |
290 | \ |
291 | return d.ll; \ |
292 | } \ |
293 | \ |
294 | uint32_t name##16s(uint32_t src1, uint32_t src2) \ |
295 | { \ |
296 | VIS32 s, d; \ |
297 | \ |
298 | s.l = src1; \ |
299 | d.l = src2; \ |
300 | \ |
301 | d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ |
302 | d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ |
303 | \ |
304 | return d.l; \ |
305 | } \ |
306 | \ |
307 | uint64_t name##32(uint64_t src1, uint64_t src2) \ |
308 | { \ |
309 | VIS64 s, d; \ |
310 | \ |
311 | s.ll = src1; \ |
312 | d.ll = src2; \ |
313 | \ |
314 | d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ |
315 | d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ |
316 | \ |
317 | return d.ll; \ |
318 | } \ |
319 | \ |
320 | uint32_t name##32s(uint32_t src1, uint32_t src2) \ |
321 | { \ |
322 | VIS32 s, d; \ |
323 | \ |
324 | s.l = src1; \ |
325 | d.l = src2; \ |
326 | \ |
327 | d.l = F(d.l, s.l); \ |
328 | \ |
329 | return d.l; \ |
330 | } |
331 | |
332 | #define FADD(a, b) ((a) + (b)) |
333 | #define FSUB(a, b) ((a) - (b)) |
334 | VIS_HELPER(helper_fpadd, FADD) |
335 | VIS_HELPER(helper_fpsub, FSUB) |
336 | |
337 | #define VIS_CMPHELPER(name, F) \ |
338 | uint64_t name##16(uint64_t src1, uint64_t src2) \ |
339 | { \ |
340 | VIS64 s, d; \ |
341 | \ |
342 | s.ll = src1; \ |
343 | d.ll = src2; \ |
344 | \ |
345 | d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ |
346 | d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ |
347 | d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ |
348 | d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ |
349 | d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ |
350 | \ |
351 | return d.ll; \ |
352 | } \ |
353 | \ |
354 | uint64_t name##32(uint64_t src1, uint64_t src2) \ |
355 | { \ |
356 | VIS64 s, d; \ |
357 | \ |
358 | s.ll = src1; \ |
359 | d.ll = src2; \ |
360 | \ |
361 | d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ |
362 | d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ |
363 | d.VIS_L64(1) = 0; \ |
364 | \ |
365 | return d.ll; \ |
366 | } |
367 | |
368 | #define FCMPGT(a, b) ((a) > (b)) |
369 | #define FCMPEQ(a, b) ((a) == (b)) |
370 | #define FCMPLE(a, b) ((a) <= (b)) |
371 | #define FCMPNE(a, b) ((a) != (b)) |
372 | |
373 | VIS_CMPHELPER(helper_fcmpgt, FCMPGT) |
374 | VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) |
375 | VIS_CMPHELPER(helper_fcmple, FCMPLE) |
376 | VIS_CMPHELPER(helper_fcmpne, FCMPNE) |
377 | |
378 | uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) |
379 | { |
380 | int i; |
381 | for (i = 0; i < 8; i++) { |
382 | int s1, s2; |
383 | |
384 | s1 = (src1 >> (56 - (i * 8))) & 0xff; |
385 | s2 = (src2 >> (56 - (i * 8))) & 0xff; |
386 | |
387 | /* Absolute value of difference. */ |
388 | s1 -= s2; |
389 | if (s1 < 0) { |
390 | s1 = -s1; |
391 | } |
392 | |
393 | sum += s1; |
394 | } |
395 | |
396 | return sum; |
397 | } |
398 | |
399 | uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) |
400 | { |
401 | int scale = (gsr >> 3) & 0xf; |
402 | uint32_t ret = 0; |
403 | int byte; |
404 | |
405 | for (byte = 0; byte < 4; byte++) { |
406 | uint32_t val; |
407 | int16_t src = rs2 >> (byte * 16); |
408 | int32_t scaled = src << scale; |
409 | int32_t from_fixed = scaled >> 7; |
410 | |
411 | val = (from_fixed < 0 ? 0 : |
412 | from_fixed > 255 ? 255 : from_fixed); |
413 | |
414 | ret |= val << (8 * byte); |
415 | } |
416 | |
417 | return ret; |
418 | } |
419 | |
420 | uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) |
421 | { |
422 | int scale = (gsr >> 3) & 0x1f; |
423 | uint64_t ret = 0; |
424 | int word; |
425 | |
426 | ret = (rs1 << 8) & ~(0x000000ff000000ffULL); |
427 | for (word = 0; word < 2; word++) { |
428 | uint64_t val; |
429 | int32_t src = rs2 >> (word * 32); |
430 | int64_t scaled = (int64_t)src << scale; |
431 | int64_t from_fixed = scaled >> 23; |
432 | |
433 | val = (from_fixed < 0 ? 0 : |
434 | (from_fixed > 255) ? 255 : from_fixed); |
435 | |
436 | ret |= val << (32 * word); |
437 | } |
438 | |
439 | return ret; |
440 | } |
441 | |
442 | uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) |
443 | { |
444 | int scale = (gsr >> 3) & 0x1f; |
445 | uint32_t ret = 0; |
446 | int word; |
447 | |
448 | for (word = 0; word < 2; word++) { |
449 | uint32_t val; |
450 | int32_t src = rs2 >> (word * 32); |
451 | int64_t scaled = (int64_t)src << scale; |
452 | int64_t from_fixed = scaled >> 16; |
453 | |
454 | val = (from_fixed < -32768 ? -32768 : |
455 | from_fixed > 32767 ? 32767 : from_fixed); |
456 | |
457 | ret |= (val & 0xffff) << (word * 16); |
458 | } |
459 | |
460 | return ret; |
461 | } |
462 | |
463 | uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) |
464 | { |
465 | union { |
466 | uint64_t ll[2]; |
467 | uint8_t b[16]; |
468 | } s; |
469 | VIS64 r; |
470 | uint32_t i, mask, host; |
471 | |
472 | /* Set up S such that we can index across all of the bytes. */ |
473 | #ifdef HOST_WORDS_BIGENDIAN |
474 | s.ll[0] = src1; |
475 | s.ll[1] = src2; |
476 | host = 0; |
477 | #else |
478 | s.ll[1] = src1; |
479 | s.ll[0] = src2; |
480 | host = 15; |
481 | #endif |
482 | mask = gsr >> 32; |
483 | |
484 | for (i = 0; i < 8; ++i) { |
485 | unsigned e = (mask >> (28 - i*4)) & 0xf; |
486 | r.VIS_B64(i) = s.b[e ^ host]; |
487 | } |
488 | |
489 | return r.ll; |
490 | } |
491 | |