1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "internal.h"
23#include "qemu/host-utils.h"
24#include "qemu/main-loop.h"
25#include "exec/helper-proto.h"
26#include "crypto/aes.h"
27#include "fpu/softfloat.h"
28#include "qapi/error.h"
29#include "qemu/guest-random.h"
30
31#include "helper_regs.h"
32/*****************************************************************************/
33/* Fixed point operations helpers */
34
35static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
36{
37 if (unlikely(ov)) {
38 env->so = env->ov = 1;
39 } else {
40 env->ov = 0;
41 }
42}
43
44target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
46{
47 uint64_t rt = 0;
48 int overflow = 0;
49
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
52
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
58 }
59
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
62 }
63
64 if (oe) {
65 helper_update_ov_legacy(env, overflow);
66 }
67
68 return (target_ulong)rt;
69}
70
71target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
72 uint32_t oe)
73{
74 int64_t rt = 0;
75 int overflow = 0;
76
77 int64_t dividend = (int64_t)ra << 32;
78 int64_t divisor = (int64_t)((int32_t)rb);
79
80 if (unlikely((divisor == 0) ||
81 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
82 overflow = 1;
83 } else {
84 rt = dividend / divisor;
85 overflow = rt != (int32_t)rt;
86 }
87
88 if (unlikely(overflow)) {
89 rt = 0; /* Undefined */
90 }
91
92 if (oe) {
93 helper_update_ov_legacy(env, overflow);
94 }
95
96 return (target_ulong)rt;
97}
98
99#if defined(TARGET_PPC64)
100
101uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
102{
103 uint64_t rt = 0;
104 int overflow = 0;
105
106 overflow = divu128(&rt, &ra, rb);
107
108 if (unlikely(overflow)) {
109 rt = 0; /* Undefined */
110 }
111
112 if (oe) {
113 helper_update_ov_legacy(env, overflow);
114 }
115
116 return rt;
117}
118
119uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
120{
121 int64_t rt = 0;
122 int64_t ra = (int64_t)rau;
123 int64_t rb = (int64_t)rbu;
124 int overflow = divs128(&rt, &ra, rb);
125
126 if (unlikely(overflow)) {
127 rt = 0; /* Undefined */
128 }
129
130 if (oe) {
131 helper_update_ov_legacy(env, overflow);
132 }
133
134 return rt;
135}
136
137#endif
138
139
140#if defined(TARGET_PPC64)
141/* if x = 0xab, returns 0xababababababababa */
142#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
143
144/*
145 * subtract 1 from each byte, and with inverse, check if MSB is set at each
146 * byte.
147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
149 */
150#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
151
152/* When you XOR the pattern and there is a match, that byte will be zero */
153#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
154
155uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
156{
157 return hasvalue(rb, ra) ? CRF_GT : 0;
158}
159
160#undef pattern
161#undef haszero
162#undef hasvalue
163
164/*
165 * Return a random number.
166 */
167uint64_t helper_darn32(void)
168{
169 Error *err = NULL;
170 uint32_t ret;
171
172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
174 error_get_pretty(err));
175 error_free(err);
176 return -1;
177 }
178
179 return ret;
180}
181
182uint64_t helper_darn64(void)
183{
184 Error *err = NULL;
185 uint64_t ret;
186
187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
189 error_get_pretty(err));
190 error_free(err);
191 return -1;
192 }
193
194 return ret;
195}
196
197uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
198{
199 int i;
200 uint64_t ra = 0;
201
202 for (i = 0; i < 8; i++) {
203 int index = (rs >> (i * 8)) & 0xFF;
204 if (index < 64) {
205 if (rb & PPC_BIT(index)) {
206 ra |= 1 << i;
207 }
208 }
209 }
210 return ra;
211}
212
213#endif
214
215target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
216{
217 target_ulong mask = 0xff;
218 target_ulong ra = 0;
219 int i;
220
221 for (i = 0; i < sizeof(target_ulong); i++) {
222 if ((rs & mask) == (rb & mask)) {
223 ra |= mask;
224 }
225 mask <<= 8;
226 }
227 return ra;
228}
229
230/* shift right arithmetic helper */
231target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
233{
234 int32_t ret;
235
236 if (likely(!(shift & 0x20))) {
237 if (likely((uint32_t)shift != 0)) {
238 shift &= 0x1f;
239 ret = (int32_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca32 = env->ca = 0;
242 } else {
243 env->ca32 = env->ca = 1;
244 }
245 } else {
246 ret = (int32_t)value;
247 env->ca32 = env->ca = 0;
248 }
249 } else {
250 ret = (int32_t)value >> 31;
251 env->ca32 = env->ca = (ret != 0);
252 }
253 return (target_long)ret;
254}
255
256#if defined(TARGET_PPC64)
257target_ulong helper_srad(CPUPPCState *env, target_ulong value,
258 target_ulong shift)
259{
260 int64_t ret;
261
262 if (likely(!(shift & 0x40))) {
263 if (likely((uint64_t)shift != 0)) {
264 shift &= 0x3f;
265 ret = (int64_t)value >> shift;
266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
267 env->ca32 = env->ca = 0;
268 } else {
269 env->ca32 = env->ca = 1;
270 }
271 } else {
272 ret = (int64_t)value;
273 env->ca32 = env->ca = 0;
274 }
275 } else {
276 ret = (int64_t)value >> 63;
277 env->ca32 = env->ca = (ret != 0);
278 }
279 return ret;
280}
281#endif
282
283#if defined(TARGET_PPC64)
284target_ulong helper_popcntb(target_ulong val)
285{
286 /* Note that we don't fold past bytes */
287 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
288 0x5555555555555555ULL);
289 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
290 0x3333333333333333ULL);
291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
292 0x0f0f0f0f0f0f0f0fULL);
293 return val;
294}
295
296target_ulong helper_popcntw(target_ulong val)
297{
298 /* Note that we don't fold past words. */
299 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
300 0x5555555555555555ULL);
301 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
302 0x3333333333333333ULL);
303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
304 0x0f0f0f0f0f0f0f0fULL);
305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
306 0x00ff00ff00ff00ffULL);
307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
308 0x0000ffff0000ffffULL);
309 return val;
310}
311#else
312target_ulong helper_popcntb(target_ulong val)
313{
314 /* Note that we don't fold past bytes */
315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
318 return val;
319}
320#endif
321
322/*****************************************************************************/
323/* PowerPC 601 specific instructions (POWER bridge) */
324target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
325{
326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
327
328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
329 (int32_t)arg2 == 0) {
330 env->spr[SPR_MQ] = 0;
331 return INT32_MIN;
332 } else {
333 env->spr[SPR_MQ] = tmp % arg2;
334 return tmp / (int32_t)arg2;
335 }
336}
337
338target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
339 target_ulong arg2)
340{
341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
342
343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
344 (int32_t)arg2 == 0) {
345 env->so = env->ov = 1;
346 env->spr[SPR_MQ] = 0;
347 return INT32_MIN;
348 } else {
349 env->spr[SPR_MQ] = tmp % arg2;
350 tmp /= (int32_t)arg2;
351 if ((int32_t)tmp != tmp) {
352 env->so = env->ov = 1;
353 } else {
354 env->ov = 0;
355 }
356 return tmp;
357 }
358}
359
360target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
361 target_ulong arg2)
362{
363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
369 return (int32_t)arg1 / (int32_t)arg2;
370 }
371}
372
373target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
374 target_ulong arg2)
375{
376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
377 (int32_t)arg2 == 0) {
378 env->so = env->ov = 1;
379 env->spr[SPR_MQ] = 0;
380 return INT32_MIN;
381 } else {
382 env->ov = 0;
383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
384 return (int32_t)arg1 / (int32_t)arg2;
385 }
386}
387
388/*****************************************************************************/
389/* 602 specific instructions */
390/* mfrom is the most crazy instruction ever seen, imho ! */
391/* Real implementation uses a ROM table. Do the same */
392/*
393 * Extremely decomposed:
394 * -arg / 256
395 * return 256 * log10(10 + 1.0) + 0.5
396 */
397#if !defined(CONFIG_USER_ONLY)
398target_ulong helper_602_mfrom(target_ulong arg)
399{
400 if (likely(arg < 602)) {
401#include "mfrom_table.inc.c"
402 return mfrom_ROM_table[arg];
403 } else {
404 return 0;
405 }
406}
407#endif
408
409/*****************************************************************************/
410/* Altivec extension helpers */
411#if defined(HOST_WORDS_BIGENDIAN)
412#define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414#else
415#define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
417#endif
418
419/* Saturating arithmetic helpers. */
420#define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
422 { \
423 to_type r; \
424 \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
433 } \
434 return r; \
435 }
436#define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
446 } \
447 return r; \
448 }
449SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
452
453SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459#undef SATCVT
460#undef SATCVTU
461
462void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
463{
464 env->vscr = vscr & ~(1u << VSCR_SAT);
465 /* Which bit we set is completely arbitrary, but clear the rest. */
466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
467 env->vscr_sat.u64[1] = 0;
468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
469}
470
471uint32_t helper_mfvscr(CPUPPCState *env)
472{
473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
474 return env->vscr | (sat << VSCR_SAT);
475}
476
477static inline void set_vscr_sat(CPUPPCState *env)
478{
479 /* The choice of non-zero value is arbitrary. */
480 env->vscr_sat.u32[0] = 1;
481}
482
483void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
484{
485 int i;
486
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
489 }
490}
491
492/* vprtybw */
493void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
494{
495 int i;
496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
498 res ^= res >> 8;
499 r->u32[i] = res & 1;
500 }
501}
502
503/* vprtybd */
504void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
505{
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
509 res ^= res >> 16;
510 res ^= res >> 8;
511 r->u64[i] = res & 1;
512 }
513}
514
515/* vprtybq */
516void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
517{
518 uint64_t res = b->u64[0] ^ b->u64[1];
519 res ^= res >> 32;
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->VsrD(1) = res & 1;
523 r->VsrD(0) = 0;
524}
525
526#define VARITH_DO(name, op, element) \
527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
528 { \
529 int i; \
530 \
531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
532 r->element[i] = a->element[i] op b->element[i]; \
533 } \
534 }
535VARITH_DO(muluwm, *, u32)
536#undef VARITH_DO
537#undef VARITH
538
539#define VARITHFP(suffix, func) \
540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
541 ppc_avr_t *b) \
542 { \
543 int i; \
544 \
545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
547 } \
548 }
549VARITHFP(addfp, float32_add)
550VARITHFP(subfp, float32_sub)
551VARITHFP(minfp, float32_min)
552VARITHFP(maxfp, float32_max)
553#undef VARITHFP
554
555#define VARITHFPFMA(suffix, type) \
556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
557 ppc_avr_t *b, ppc_avr_t *c) \
558 { \
559 int i; \
560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
562 type, &env->vec_status); \
563 } \
564 }
565VARITHFPFMA(maddfp, 0);
566VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
567#undef VARITHFPFMA
568
569#define VARITHSAT_CASE(type, op, cvt, element) \
570 { \
571 type result = (type)a->element[i] op (type)b->element[i]; \
572 r->element[i] = cvt(result, &sat); \
573 }
574
575#define VARITHSAT_DO(name, op, optype, cvt, element) \
576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
578 { \
579 int sat = 0; \
580 int i; \
581 \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 VARITHSAT_CASE(optype, op, cvt, element); \
584 } \
585 if (sat) { \
586 vscr_sat->u32[0] = 1; \
587 } \
588 }
589#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
592#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
595VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
596VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
597VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
598VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
599VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
600VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
601#undef VARITHSAT_CASE
602#undef VARITHSAT_DO
603#undef VARITHSAT_SIGNED
604#undef VARITHSAT_UNSIGNED
605
606#define VAVG_DO(name, element, etype) \
607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
608 { \
609 int i; \
610 \
611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
613 r->element[i] = x >> 1; \
614 } \
615 }
616
617#define VAVG(type, signed_element, signed_type, unsigned_element, \
618 unsigned_type) \
619 VAVG_DO(avgs##type, signed_element, signed_type) \
620 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
621VAVG(b, s8, int16_t, u8, uint16_t)
622VAVG(h, s16, int32_t, u16, uint32_t)
623VAVG(w, s32, int64_t, u32, uint64_t)
624#undef VAVG_DO
625#undef VAVG
626
627#define VABSDU_DO(name, element) \
628void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
629{ \
630 int i; \
631 \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 r->element[i] = (a->element[i] > b->element[i]) ? \
634 (a->element[i] - b->element[i]) : \
635 (b->element[i] - a->element[i]); \
636 } \
637}
638
639/*
640 * VABSDU - Vector absolute difference unsigned
641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
642 * element - element type to access from vector
643 */
644#define VABSDU(type, element) \
645 VABSDU_DO(absdu##type, element)
646VABSDU(b, u8)
647VABSDU(h, u16)
648VABSDU(w, u32)
649#undef VABSDU_DO
650#undef VABSDU
651
652#define VCF(suffix, cvt, element) \
653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
654 ppc_avr_t *b, uint32_t uim) \
655 { \
656 int i; \
657 \
658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
659 float32 t = cvt(b->element[i], &env->vec_status); \
660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
661 } \
662 }
663VCF(ux, uint32_to_float32, u32)
664VCF(sx, int32_to_float32, s32)
665#undef VCF
666
667#define VCMP_DO(suffix, compare, element, record) \
668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
669 ppc_avr_t *a, ppc_avr_t *b) \
670 { \
671 uint64_t ones = (uint64_t)-1; \
672 uint64_t all = ones; \
673 uint64_t none = 0; \
674 int i; \
675 \
676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
677 uint64_t result = (a->element[i] compare b->element[i] ? \
678 ones : 0x0); \
679 switch (sizeof(a->element[0])) { \
680 case 8: \
681 r->u64[i] = result; \
682 break; \
683 case 4: \
684 r->u32[i] = result; \
685 break; \
686 case 2: \
687 r->u16[i] = result; \
688 break; \
689 case 1: \
690 r->u8[i] = result; \
691 break; \
692 } \
693 all &= result; \
694 none |= result; \
695 } \
696 if (record) { \
697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
698 } \
699 }
700#define VCMP(suffix, compare, element) \
701 VCMP_DO(suffix, compare, element, 0) \
702 VCMP_DO(suffix##_dot, compare, element, 1)
703VCMP(equb, ==, u8)
704VCMP(equh, ==, u16)
705VCMP(equw, ==, u32)
706VCMP(equd, ==, u64)
707VCMP(gtub, >, u8)
708VCMP(gtuh, >, u16)
709VCMP(gtuw, >, u32)
710VCMP(gtud, >, u64)
711VCMP(gtsb, >, s8)
712VCMP(gtsh, >, s16)
713VCMP(gtsw, >, s32)
714VCMP(gtsd, >, s64)
715#undef VCMP_DO
716#undef VCMP
717
718#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
719void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
720 ppc_avr_t *a, ppc_avr_t *b) \
721{ \
722 etype ones = (etype)-1; \
723 etype all = ones; \
724 etype result, none = 0; \
725 int i; \
726 \
727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
728 if (cmpzero) { \
729 result = ((a->element[i] == 0) \
730 || (b->element[i] == 0) \
731 || (a->element[i] != b->element[i]) ? \
732 ones : 0x0); \
733 } else { \
734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
735 } \
736 r->element[i] = result; \
737 all &= result; \
738 none |= result; \
739 } \
740 if (record) { \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
742 } \
743}
744
745/*
746 * VCMPNEZ - Vector compare not equal to zero
747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
748 * element - element type to access from vector
749 */
750#define VCMPNE(suffix, element, etype, cmpzero) \
751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
753VCMPNE(zb, u8, uint8_t, 1)
754VCMPNE(zh, u16, uint16_t, 1)
755VCMPNE(zw, u32, uint32_t, 1)
756VCMPNE(b, u8, uint8_t, 0)
757VCMPNE(h, u16, uint16_t, 0)
758VCMPNE(w, u32, uint32_t, 0)
759#undef VCMPNE_DO
760#undef VCMPNE
761
762#define VCMPFP_DO(suffix, compare, order, record) \
763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
764 ppc_avr_t *a, ppc_avr_t *b) \
765 { \
766 uint32_t ones = (uint32_t)-1; \
767 uint32_t all = ones; \
768 uint32_t none = 0; \
769 int i; \
770 \
771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
772 uint32_t result; \
773 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
774 &env->vec_status); \
775 if (rel == float_relation_unordered) { \
776 result = 0; \
777 } else if (rel compare order) { \
778 result = ones; \
779 } else { \
780 result = 0; \
781 } \
782 r->u32[i] = result; \
783 all &= result; \
784 none |= result; \
785 } \
786 if (record) { \
787 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
788 } \
789 }
790#define VCMPFP(suffix, compare, order) \
791 VCMPFP_DO(suffix, compare, order, 0) \
792 VCMPFP_DO(suffix##_dot, compare, order, 1)
793VCMPFP(eqfp, ==, float_relation_equal)
794VCMPFP(gefp, !=, float_relation_less)
795VCMPFP(gtfp, ==, float_relation_greater)
796#undef VCMPFP_DO
797#undef VCMPFP
798
799static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
800 ppc_avr_t *a, ppc_avr_t *b, int record)
801{
802 int i;
803 int all_in = 0;
804
805 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
806 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
807 &env->vec_status);
808 if (le_rel == float_relation_unordered) {
809 r->u32[i] = 0xc0000000;
810 all_in = 1;
811 } else {
812 float32 bneg = float32_chs(b->f32[i]);
813 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
814 &env->vec_status);
815 int le = le_rel != float_relation_greater;
816 int ge = ge_rel != float_relation_less;
817
818 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
819 all_in |= (!le | !ge);
820 }
821 }
822 if (record) {
823 env->crf[6] = (all_in == 0) << 1;
824 }
825}
826
827void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
828{
829 vcmpbfp_internal(env, r, a, b, 0);
830}
831
832void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
833 ppc_avr_t *b)
834{
835 vcmpbfp_internal(env, r, a, b, 1);
836}
837
838#define VCT(suffix, satcvt, element) \
839 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
840 ppc_avr_t *b, uint32_t uim) \
841 { \
842 int i; \
843 int sat = 0; \
844 float_status s = env->vec_status; \
845 \
846 set_float_rounding_mode(float_round_to_zero, &s); \
847 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
848 if (float32_is_any_nan(b->f32[i])) { \
849 r->element[i] = 0; \
850 } else { \
851 float64 t = float32_to_float64(b->f32[i], &s); \
852 int64_t j; \
853 \
854 t = float64_scalbn(t, uim, &s); \
855 j = float64_to_int64(t, &s); \
856 r->element[i] = satcvt(j, &sat); \
857 } \
858 } \
859 if (sat) { \
860 set_vscr_sat(env); \
861 } \
862 }
863VCT(uxs, cvtsduw, u32)
864VCT(sxs, cvtsdsw, s32)
865#undef VCT
866
867target_ulong helper_vclzlsbb(ppc_avr_t *r)
868{
869 target_ulong count = 0;
870 int i;
871 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
872 if (r->VsrB(i) & 0x01) {
873 break;
874 }
875 count++;
876 }
877 return count;
878}
879
880target_ulong helper_vctzlsbb(ppc_avr_t *r)
881{
882 target_ulong count = 0;
883 int i;
884 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
885 if (r->VsrB(i) & 0x01) {
886 break;
887 }
888 count++;
889 }
890 return count;
891}
892
893void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
895{
896 int sat = 0;
897 int i;
898
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 int32_t prod = a->s16[i] * b->s16[i];
901 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
902
903 r->s16[i] = cvtswsh(t, &sat);
904 }
905
906 if (sat) {
907 set_vscr_sat(env);
908 }
909}
910
911void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
912 ppc_avr_t *b, ppc_avr_t *c)
913{
914 int sat = 0;
915 int i;
916
917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
918 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
919 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
920 r->s16[i] = cvtswsh(t, &sat);
921 }
922
923 if (sat) {
924 set_vscr_sat(env);
925 }
926}
927
928void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
929{
930 int i;
931
932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
933 int32_t prod = a->s16[i] * b->s16[i];
934 r->s16[i] = (int16_t) (prod + c->s16[i]);
935 }
936}
937
938#define VMRG_DO(name, element, access, ofs) \
939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
940 { \
941 ppc_avr_t result; \
942 int i, half = ARRAY_SIZE(r->element) / 2; \
943 \
944 for (i = 0; i < half; i++) { \
945 result.access(i * 2 + 0) = a->access(i + ofs); \
946 result.access(i * 2 + 1) = b->access(i + ofs); \
947 } \
948 *r = result; \
949 }
950
951#define VMRG(suffix, element, access) \
952 VMRG_DO(mrgl##suffix, element, access, half) \
953 VMRG_DO(mrgh##suffix, element, access, 0)
954VMRG(b, u8, VsrB)
955VMRG(h, u16, VsrH)
956VMRG(w, u32, VsrW)
957#undef VMRG_DO
958#undef VMRG
959
960void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
961 ppc_avr_t *b, ppc_avr_t *c)
962{
963 int32_t prod[16];
964 int i;
965
966 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
967 prod[i] = (int32_t)a->s8[i] * b->u8[i];
968 }
969
970 VECTOR_FOR_INORDER_I(i, s32) {
971 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
972 prod[4 * i + 2] + prod[4 * i + 3];
973 }
974}
975
976void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
977 ppc_avr_t *b, ppc_avr_t *c)
978{
979 int32_t prod[8];
980 int i;
981
982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
983 prod[i] = a->s16[i] * b->s16[i];
984 }
985
986 VECTOR_FOR_INORDER_I(i, s32) {
987 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
988 }
989}
990
991void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
992 ppc_avr_t *b, ppc_avr_t *c)
993{
994 int32_t prod[8];
995 int i;
996 int sat = 0;
997
998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
999 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1000 }
1001
1002 VECTOR_FOR_INORDER_I(i, s32) {
1003 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1004
1005 r->u32[i] = cvtsdsw(t, &sat);
1006 }
1007
1008 if (sat) {
1009 set_vscr_sat(env);
1010 }
1011}
1012
1013void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1014 ppc_avr_t *b, ppc_avr_t *c)
1015{
1016 uint16_t prod[16];
1017 int i;
1018
1019 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1020 prod[i] = a->u8[i] * b->u8[i];
1021 }
1022
1023 VECTOR_FOR_INORDER_I(i, u32) {
1024 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1025 prod[4 * i + 2] + prod[4 * i + 3];
1026 }
1027}
1028
1029void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030 ppc_avr_t *b, ppc_avr_t *c)
1031{
1032 uint32_t prod[8];
1033 int i;
1034
1035 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1036 prod[i] = a->u16[i] * b->u16[i];
1037 }
1038
1039 VECTOR_FOR_INORDER_I(i, u32) {
1040 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1041 }
1042}
1043
1044void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1045 ppc_avr_t *b, ppc_avr_t *c)
1046{
1047 uint32_t prod[8];
1048 int i;
1049 int sat = 0;
1050
1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052 prod[i] = a->u16[i] * b->u16[i];
1053 }
1054
1055 VECTOR_FOR_INORDER_I(i, s32) {
1056 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1057
1058 r->u32[i] = cvtuduw(t, &sat);
1059 }
1060
1061 if (sat) {
1062 set_vscr_sat(env);
1063 }
1064}
1065
1066#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1067 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1068 { \
1069 int i; \
1070 \
1071 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1072 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1073 (cast)b->mul_access(i); \
1074 } \
1075 }
1076
1077#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1079 { \
1080 int i; \
1081 \
1082 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1083 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1084 (cast)b->mul_access(i + 1); \
1085 } \
1086 }
1087
1088#define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1089 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1090 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1091VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1092VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1093VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1094VMUL(ub, u8, VsrB, VsrH, uint16_t)
1095VMUL(uh, u16, VsrH, VsrW, uint32_t)
1096VMUL(uw, u32, VsrW, VsrD, uint64_t)
1097#undef VMUL_DO_EVN
1098#undef VMUL_DO_ODD
1099#undef VMUL
1100
1101void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1102 ppc_avr_t *c)
1103{
1104 ppc_avr_t result;
1105 int i;
1106
1107 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1108 int s = c->VsrB(i) & 0x1f;
1109 int index = s & 0xf;
1110
1111 if (s & 0x10) {
1112 result.VsrB(i) = b->VsrB(index);
1113 } else {
1114 result.VsrB(i) = a->VsrB(index);
1115 }
1116 }
1117 *r = result;
1118}
1119
1120void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1121 ppc_avr_t *c)
1122{
1123 ppc_avr_t result;
1124 int i;
1125
1126 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1127 int s = c->VsrB(i) & 0x1f;
1128 int index = 15 - (s & 0xf);
1129
1130 if (s & 0x10) {
1131 result.VsrB(i) = a->VsrB(index);
1132 } else {
1133 result.VsrB(i) = b->VsrB(index);
1134 }
1135 }
1136 *r = result;
1137}
1138
1139#if defined(HOST_WORDS_BIGENDIAN)
1140#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1141#define VBPERMD_INDEX(i) (i)
1142#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1143#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1144#else
1145#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1146#define VBPERMD_INDEX(i) (1 - i)
1147#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1148#define EXTRACT_BIT(avr, i, index) \
1149 (extract64((avr)->u64[1 - i], 63 - index, 1))
1150#endif
1151
1152void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1153{
1154 int i, j;
1155 ppc_avr_t result = { .u64 = { 0, 0 } };
1156 VECTOR_FOR_INORDER_I(i, u64) {
1157 for (j = 0; j < 8; j++) {
1158 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1159 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1160 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1161 }
1162 }
1163 }
1164 *r = result;
1165}
1166
1167void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1168{
1169 int i;
1170 uint64_t perm = 0;
1171
1172 VECTOR_FOR_INORDER_I(i, u8) {
1173 int index = VBPERMQ_INDEX(b, i);
1174
1175 if (index < 128) {
1176 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1177 if (a->u64[VBPERMQ_DW(index)] & mask) {
1178 perm |= (0x8000 >> i);
1179 }
1180 }
1181 }
1182
1183 r->VsrD(0) = perm;
1184 r->VsrD(1) = 0;
1185}
1186
1187#undef VBPERMQ_INDEX
1188#undef VBPERMQ_DW
1189
1190#define PMSUM(name, srcfld, trgfld, trgtyp) \
1191void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1192{ \
1193 int i, j; \
1194 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1195 \
1196 VECTOR_FOR_INORDER_I(i, srcfld) { \
1197 prod[i] = 0; \
1198 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1199 if (a->srcfld[i] & (1ull << j)) { \
1200 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1201 } \
1202 } \
1203 } \
1204 \
1205 VECTOR_FOR_INORDER_I(i, trgfld) { \
1206 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1207 } \
1208}
1209
1210PMSUM(vpmsumb, u8, u16, uint16_t)
1211PMSUM(vpmsumh, u16, u32, uint32_t)
1212PMSUM(vpmsumw, u32, u64, uint64_t)
1213
1214void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1215{
1216
1217#ifdef CONFIG_INT128
1218 int i, j;
1219 __uint128_t prod[2];
1220
1221 VECTOR_FOR_INORDER_I(i, u64) {
1222 prod[i] = 0;
1223 for (j = 0; j < 64; j++) {
1224 if (a->u64[i] & (1ull << j)) {
1225 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1226 }
1227 }
1228 }
1229
1230 r->u128 = prod[0] ^ prod[1];
1231
1232#else
1233 int i, j;
1234 ppc_avr_t prod[2];
1235
1236 VECTOR_FOR_INORDER_I(i, u64) {
1237 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1238 for (j = 0; j < 64; j++) {
1239 if (a->u64[i] & (1ull << j)) {
1240 ppc_avr_t bshift;
1241 if (j == 0) {
1242 bshift.VsrD(0) = 0;
1243 bshift.VsrD(1) = b->u64[i];
1244 } else {
1245 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1246 bshift.VsrD(1) = b->u64[i] << j;
1247 }
1248 prod[i].VsrD(1) ^= bshift.VsrD(1);
1249 prod[i].VsrD(0) ^= bshift.VsrD(0);
1250 }
1251 }
1252 }
1253
1254 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1255 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1256#endif
1257}
1258
1259
1260#if defined(HOST_WORDS_BIGENDIAN)
1261#define PKBIG 1
1262#else
1263#define PKBIG 0
1264#endif
1265void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1266{
1267 int i, j;
1268 ppc_avr_t result;
1269#if defined(HOST_WORDS_BIGENDIAN)
1270 const ppc_avr_t *x[2] = { a, b };
1271#else
1272 const ppc_avr_t *x[2] = { b, a };
1273#endif
1274
1275 VECTOR_FOR_INORDER_I(i, u64) {
1276 VECTOR_FOR_INORDER_I(j, u32) {
1277 uint32_t e = x[i]->u32[j];
1278
1279 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1280 ((e >> 6) & 0x3e0) |
1281 ((e >> 3) & 0x1f));
1282 }
1283 }
1284 *r = result;
1285}
1286
1287#define VPK(suffix, from, to, cvt, dosat) \
1288 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1289 ppc_avr_t *a, ppc_avr_t *b) \
1290 { \
1291 int i; \
1292 int sat = 0; \
1293 ppc_avr_t result; \
1294 ppc_avr_t *a0 = PKBIG ? a : b; \
1295 ppc_avr_t *a1 = PKBIG ? b : a; \
1296 \
1297 VECTOR_FOR_INORDER_I(i, from) { \
1298 result.to[i] = cvt(a0->from[i], &sat); \
1299 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1300 } \
1301 *r = result; \
1302 if (dosat && sat) { \
1303 set_vscr_sat(env); \
1304 } \
1305 }
1306#define I(x, y) (x)
1307VPK(shss, s16, s8, cvtshsb, 1)
1308VPK(shus, s16, u8, cvtshub, 1)
1309VPK(swss, s32, s16, cvtswsh, 1)
1310VPK(swus, s32, u16, cvtswuh, 1)
1311VPK(sdss, s64, s32, cvtsdsw, 1)
1312VPK(sdus, s64, u32, cvtsduw, 1)
1313VPK(uhus, u16, u8, cvtuhub, 1)
1314VPK(uwus, u32, u16, cvtuwuh, 1)
1315VPK(udus, u64, u32, cvtuduw, 1)
1316VPK(uhum, u16, u8, I, 0)
1317VPK(uwum, u32, u16, I, 0)
1318VPK(udum, u64, u32, I, 0)
1319#undef I
1320#undef VPK
1321#undef PKBIG
1322
1323void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1324{
1325 int i;
1326
1327 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1328 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1329 }
1330}
1331
1332#define VRFI(suffix, rounding) \
1333 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1334 ppc_avr_t *b) \
1335 { \
1336 int i; \
1337 float_status s = env->vec_status; \
1338 \
1339 set_float_rounding_mode(rounding, &s); \
1340 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1341 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1342 } \
1343 }
1344VRFI(n, float_round_nearest_even)
1345VRFI(m, float_round_down)
1346VRFI(p, float_round_up)
1347VRFI(z, float_round_to_zero)
1348#undef VRFI
1349
1350#define VROTATE(suffix, element, mask) \
1351 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1352 { \
1353 int i; \
1354 \
1355 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1356 unsigned int shift = b->element[i] & mask; \
1357 r->element[i] = (a->element[i] << shift) | \
1358 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1359 } \
1360 }
1361VROTATE(b, u8, 0x7)
1362VROTATE(h, u16, 0xF)
1363VROTATE(w, u32, 0x1F)
1364VROTATE(d, u64, 0x3F)
1365#undef VROTATE
1366
1367void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1368{
1369 int i;
1370
1371 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1372 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1373
1374 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1375 }
1376}
1377
1378#define VRLMI(name, size, element, insert) \
1379void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1380{ \
1381 int i; \
1382 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1383 uint##size##_t src1 = a->element[i]; \
1384 uint##size##_t src2 = b->element[i]; \
1385 uint##size##_t src3 = r->element[i]; \
1386 uint##size##_t begin, end, shift, mask, rot_val; \
1387 \
1388 shift = extract##size(src2, 0, 6); \
1389 end = extract##size(src2, 8, 6); \
1390 begin = extract##size(src2, 16, 6); \
1391 rot_val = rol##size(src1, shift); \
1392 mask = mask_u##size(begin, end); \
1393 if (insert) { \
1394 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1395 } else { \
1396 r->element[i] = (rot_val & mask); \
1397 } \
1398 } \
1399}
1400
1401VRLMI(vrldmi, 64, u64, 1);
1402VRLMI(vrlwmi, 32, u32, 1);
1403VRLMI(vrldnm, 64, u64, 0);
1404VRLMI(vrlwnm, 32, u32, 0);
1405
1406void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1407 ppc_avr_t *c)
1408{
1409 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1410 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1411}
1412
1413void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1414{
1415 int i;
1416
1417 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1418 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1419 }
1420}
1421
1422void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1423{
1424 int i;
1425
1426 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1427 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1428 }
1429}
1430
1431#if defined(HOST_WORDS_BIGENDIAN)
1432#define VEXTU_X_DO(name, size, left) \
1433 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1434 { \
1435 int index; \
1436 if (left) { \
1437 index = (a & 0xf) * 8; \
1438 } else { \
1439 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1440 } \
1441 return int128_getlo(int128_rshift(b->s128, index)) & \
1442 MAKE_64BIT_MASK(0, size); \
1443 }
1444#else
1445#define VEXTU_X_DO(name, size, left) \
1446 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1447 { \
1448 int index; \
1449 if (left) { \
1450 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1451 } else { \
1452 index = (a & 0xf) * 8; \
1453 } \
1454 return int128_getlo(int128_rshift(b->s128, index)) & \
1455 MAKE_64BIT_MASK(0, size); \
1456 }
1457#endif
1458
1459VEXTU_X_DO(vextublx, 8, 1)
1460VEXTU_X_DO(vextuhlx, 16, 1)
1461VEXTU_X_DO(vextuwlx, 32, 1)
1462VEXTU_X_DO(vextubrx, 8, 0)
1463VEXTU_X_DO(vextuhrx, 16, 0)
1464VEXTU_X_DO(vextuwrx, 32, 0)
1465#undef VEXTU_X_DO
1466
1467void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1468{
1469 int i;
1470 unsigned int shift, bytes, size;
1471
1472 size = ARRAY_SIZE(r->u8);
1473 for (i = 0; i < size; i++) {
1474 shift = b->VsrB(i) & 0x7; /* extract shift value */
1475 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1476 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1477 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1478 }
1479}
1480
1481void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1482{
1483 int i;
1484 unsigned int shift, bytes;
1485
1486 /*
1487 * Use reverse order, as destination and source register can be
1488 * same. Its being modified in place saving temporary, reverse
1489 * order will guarantee that computed result is not fed back.
1490 */
1491 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1492 shift = b->VsrB(i) & 0x7; /* extract shift value */
1493 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1494 /* extract adjacent bytes */
1495 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1496 }
1497}
1498
1499void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1500{
1501 int sh = shift & 0xf;
1502 int i;
1503 ppc_avr_t result;
1504
1505 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1506 int index = sh + i;
1507 if (index > 0xf) {
1508 result.VsrB(i) = b->VsrB(index - 0x10);
1509 } else {
1510 result.VsrB(i) = a->VsrB(index);
1511 }
1512 }
1513 *r = result;
1514}
1515
1516void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1517{
1518 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1519
1520#if defined(HOST_WORDS_BIGENDIAN)
1521 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1522 memset(&r->u8[16 - sh], 0, sh);
1523#else
1524 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1525 memset(&r->u8[0], 0, sh);
1526#endif
1527}
1528
1529#if defined(HOST_WORDS_BIGENDIAN)
1530#define VINSERT(suffix, element) \
1531 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1532 { \
1533 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1534 sizeof(r->element[0])); \
1535 }
1536#else
1537#define VINSERT(suffix, element) \
1538 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1539 { \
1540 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1541 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1542 }
1543#endif
1544VINSERT(b, u8)
1545VINSERT(h, u16)
1546VINSERT(w, u32)
1547VINSERT(d, u64)
1548#undef VINSERT
1549#if defined(HOST_WORDS_BIGENDIAN)
1550#define VEXTRACT(suffix, element) \
1551 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1552 { \
1553 uint32_t es = sizeof(r->element[0]); \
1554 memmove(&r->u8[8 - es], &b->u8[index], es); \
1555 memset(&r->u8[8], 0, 8); \
1556 memset(&r->u8[0], 0, 8 - es); \
1557 }
1558#else
1559#define VEXTRACT(suffix, element) \
1560 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1561 { \
1562 uint32_t es = sizeof(r->element[0]); \
1563 uint32_t s = (16 - index) - es; \
1564 memmove(&r->u8[8], &b->u8[s], es); \
1565 memset(&r->u8[0], 0, 8); \
1566 memset(&r->u8[8 + es], 0, 8 - es); \
1567 }
1568#endif
1569VEXTRACT(ub, u8)
1570VEXTRACT(uh, u16)
1571VEXTRACT(uw, u32)
1572VEXTRACT(d, u64)
1573#undef VEXTRACT
1574
1575void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1576 ppc_vsr_t *xb, uint32_t index)
1577{
1578 ppc_vsr_t t = { };
1579 size_t es = sizeof(uint32_t);
1580 uint32_t ext_index;
1581 int i;
1582
1583 ext_index = index;
1584 for (i = 0; i < es; i++, ext_index++) {
1585 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1586 }
1587
1588 *xt = t;
1589}
1590
1591void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1592 ppc_vsr_t *xb, uint32_t index)
1593{
1594 ppc_vsr_t t = *xt;
1595 size_t es = sizeof(uint32_t);
1596 int ins_index, i = 0;
1597
1598 ins_index = index;
1599 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1600 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1601 }
1602
1603 *xt = t;
1604}
1605
1606#define VEXT_SIGNED(name, element, cast) \
1607void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1608{ \
1609 int i; \
1610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1611 r->element[i] = (cast)b->element[i]; \
1612 } \
1613}
1614VEXT_SIGNED(vextsb2w, s32, int8_t)
1615VEXT_SIGNED(vextsb2d, s64, int8_t)
1616VEXT_SIGNED(vextsh2w, s32, int16_t)
1617VEXT_SIGNED(vextsh2d, s64, int16_t)
1618VEXT_SIGNED(vextsw2d, s64, int32_t)
1619#undef VEXT_SIGNED
1620
1621#define VNEG(name, element) \
1622void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1623{ \
1624 int i; \
1625 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1626 r->element[i] = -b->element[i]; \
1627 } \
1628}
1629VNEG(vnegw, s32)
1630VNEG(vnegd, s64)
1631#undef VNEG
1632
1633void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1634{
1635 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1636
1637#if defined(HOST_WORDS_BIGENDIAN)
1638 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1639 memset(&r->u8[0], 0, sh);
1640#else
1641 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1642 memset(&r->u8[16 - sh], 0, sh);
1643#endif
1644}
1645
1646void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1647{
1648 int i;
1649
1650 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1651 r->u32[i] = a->u32[i] >= b->u32[i];
1652 }
1653}
1654
1655void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1656{
1657 int64_t t;
1658 int i, upper;
1659 ppc_avr_t result;
1660 int sat = 0;
1661
1662 upper = ARRAY_SIZE(r->s32) - 1;
1663 t = (int64_t)b->VsrSW(upper);
1664 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1665 t += a->VsrSW(i);
1666 result.VsrSW(i) = 0;
1667 }
1668 result.VsrSW(upper) = cvtsdsw(t, &sat);
1669 *r = result;
1670
1671 if (sat) {
1672 set_vscr_sat(env);
1673 }
1674}
1675
1676void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1677{
1678 int i, j, upper;
1679 ppc_avr_t result;
1680 int sat = 0;
1681
1682 upper = 1;
1683 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1684 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1685
1686 result.VsrD(i) = 0;
1687 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1688 t += a->VsrSW(2 * i + j);
1689 }
1690 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1691 }
1692
1693 *r = result;
1694 if (sat) {
1695 set_vscr_sat(env);
1696 }
1697}
1698
1699void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1700{
1701 int i, j;
1702 int sat = 0;
1703
1704 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1705 int64_t t = (int64_t)b->s32[i];
1706
1707 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1708 t += a->s8[4 * i + j];
1709 }
1710 r->s32[i] = cvtsdsw(t, &sat);
1711 }
1712
1713 if (sat) {
1714 set_vscr_sat(env);
1715 }
1716}
1717
1718void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1719{
1720 int sat = 0;
1721 int i;
1722
1723 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1724 int64_t t = (int64_t)b->s32[i];
1725
1726 t += a->s16[2 * i] + a->s16[2 * i + 1];
1727 r->s32[i] = cvtsdsw(t, &sat);
1728 }
1729
1730 if (sat) {
1731 set_vscr_sat(env);
1732 }
1733}
1734
1735void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1736{
1737 int i, j;
1738 int sat = 0;
1739
1740 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1741 uint64_t t = (uint64_t)b->u32[i];
1742
1743 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1744 t += a->u8[4 * i + j];
1745 }
1746 r->u32[i] = cvtuduw(t, &sat);
1747 }
1748
1749 if (sat) {
1750 set_vscr_sat(env);
1751 }
1752}
1753
1754#if defined(HOST_WORDS_BIGENDIAN)
1755#define UPKHI 1
1756#define UPKLO 0
1757#else
1758#define UPKHI 0
1759#define UPKLO 1
1760#endif
1761#define VUPKPX(suffix, hi) \
1762 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1763 { \
1764 int i; \
1765 ppc_avr_t result; \
1766 \
1767 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1768 uint16_t e = b->u16[hi ? i : i + 4]; \
1769 uint8_t a = (e >> 15) ? 0xff : 0; \
1770 uint8_t r = (e >> 10) & 0x1f; \
1771 uint8_t g = (e >> 5) & 0x1f; \
1772 uint8_t b = e & 0x1f; \
1773 \
1774 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1775 } \
1776 *r = result; \
1777 }
1778VUPKPX(lpx, UPKLO)
1779VUPKPX(hpx, UPKHI)
1780#undef VUPKPX
1781
1782#define VUPK(suffix, unpacked, packee, hi) \
1783 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1784 { \
1785 int i; \
1786 ppc_avr_t result; \
1787 \
1788 if (hi) { \
1789 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1790 result.unpacked[i] = b->packee[i]; \
1791 } \
1792 } else { \
1793 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1794 i++) { \
1795 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1796 } \
1797 } \
1798 *r = result; \
1799 }
1800VUPK(hsb, s16, s8, UPKHI)
1801VUPK(hsh, s32, s16, UPKHI)
1802VUPK(hsw, s64, s32, UPKHI)
1803VUPK(lsb, s16, s8, UPKLO)
1804VUPK(lsh, s32, s16, UPKLO)
1805VUPK(lsw, s64, s32, UPKLO)
1806#undef VUPK
1807#undef UPKHI
1808#undef UPKLO
1809
1810#define VGENERIC_DO(name, element) \
1811 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1812 { \
1813 int i; \
1814 \
1815 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1816 r->element[i] = name(b->element[i]); \
1817 } \
1818 }
1819
1820#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1821#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1822
1823VGENERIC_DO(clzb, u8)
1824VGENERIC_DO(clzh, u16)
1825
1826#undef clzb
1827#undef clzh
1828
1829#define ctzb(v) ((v) ? ctz32(v) : 8)
1830#define ctzh(v) ((v) ? ctz32(v) : 16)
1831#define ctzw(v) ctz32((v))
1832#define ctzd(v) ctz64((v))
1833
1834VGENERIC_DO(ctzb, u8)
1835VGENERIC_DO(ctzh, u16)
1836VGENERIC_DO(ctzw, u32)
1837VGENERIC_DO(ctzd, u64)
1838
1839#undef ctzb
1840#undef ctzh
1841#undef ctzw
1842#undef ctzd
1843
1844#define popcntb(v) ctpop8(v)
1845#define popcnth(v) ctpop16(v)
1846#define popcntw(v) ctpop32(v)
1847#define popcntd(v) ctpop64(v)
1848
1849VGENERIC_DO(popcntb, u8)
1850VGENERIC_DO(popcnth, u16)
1851VGENERIC_DO(popcntw, u32)
1852VGENERIC_DO(popcntd, u64)
1853
1854#undef popcntb
1855#undef popcnth
1856#undef popcntw
1857#undef popcntd
1858
1859#undef VGENERIC_DO
1860
1861#if defined(HOST_WORDS_BIGENDIAN)
1862#define QW_ONE { .u64 = { 0, 1 } }
1863#else
1864#define QW_ONE { .u64 = { 1, 0 } }
1865#endif
1866
1867#ifndef CONFIG_INT128
1868
1869static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1870{
1871 t->u64[0] = ~a.u64[0];
1872 t->u64[1] = ~a.u64[1];
1873}
1874
1875static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1876{
1877 if (a.VsrD(0) < b.VsrD(0)) {
1878 return -1;
1879 } else if (a.VsrD(0) > b.VsrD(0)) {
1880 return 1;
1881 } else if (a.VsrD(1) < b.VsrD(1)) {
1882 return -1;
1883 } else if (a.VsrD(1) > b.VsrD(1)) {
1884 return 1;
1885 } else {
1886 return 0;
1887 }
1888}
1889
1890static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1891{
1892 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1893 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1894 (~a.VsrD(1) < b.VsrD(1));
1895}
1896
1897static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1898{
1899 ppc_avr_t not_a;
1900 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1901 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1902 (~a.VsrD(1) < b.VsrD(1));
1903 avr_qw_not(&not_a, a);
1904 return avr_qw_cmpu(not_a, b) < 0;
1905}
1906
1907#endif
1908
1909void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1910{
1911#ifdef CONFIG_INT128
1912 r->u128 = a->u128 + b->u128;
1913#else
1914 avr_qw_add(r, *a, *b);
1915#endif
1916}
1917
1918void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1919{
1920#ifdef CONFIG_INT128
1921 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1922#else
1923
1924 if (c->VsrD(1) & 1) {
1925 ppc_avr_t tmp;
1926
1927 tmp.VsrD(0) = 0;
1928 tmp.VsrD(1) = c->VsrD(1) & 1;
1929 avr_qw_add(&tmp, *a, tmp);
1930 avr_qw_add(r, tmp, *b);
1931 } else {
1932 avr_qw_add(r, *a, *b);
1933 }
1934#endif
1935}
1936
1937void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1938{
1939#ifdef CONFIG_INT128
1940 r->u128 = (~a->u128 < b->u128);
1941#else
1942 ppc_avr_t not_a;
1943
1944 avr_qw_not(&not_a, *a);
1945
1946 r->VsrD(0) = 0;
1947 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1948#endif
1949}
1950
1951void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1952{
1953#ifdef CONFIG_INT128
1954 int carry_out = (~a->u128 < b->u128);
1955 if (!carry_out && (c->u128 & 1)) {
1956 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1957 ((a->u128 != 0) || (b->u128 != 0));
1958 }
1959 r->u128 = carry_out;
1960#else
1961
1962 int carry_in = c->VsrD(1) & 1;
1963 int carry_out = 0;
1964 ppc_avr_t tmp;
1965
1966 carry_out = avr_qw_addc(&tmp, *a, *b);
1967
1968 if (!carry_out && carry_in) {
1969 ppc_avr_t one = QW_ONE;
1970 carry_out = avr_qw_addc(&tmp, tmp, one);
1971 }
1972 r->VsrD(0) = 0;
1973 r->VsrD(1) = carry_out;
1974#endif
1975}
1976
1977void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1978{
1979#ifdef CONFIG_INT128
1980 r->u128 = a->u128 - b->u128;
1981#else
1982 ppc_avr_t tmp;
1983 ppc_avr_t one = QW_ONE;
1984
1985 avr_qw_not(&tmp, *b);
1986 avr_qw_add(&tmp, *a, tmp);
1987 avr_qw_add(r, tmp, one);
1988#endif
1989}
1990
1991void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1992{
1993#ifdef CONFIG_INT128
1994 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1995#else
1996 ppc_avr_t tmp, sum;
1997
1998 avr_qw_not(&tmp, *b);
1999 avr_qw_add(&sum, *a, tmp);
2000
2001 tmp.VsrD(0) = 0;
2002 tmp.VsrD(1) = c->VsrD(1) & 1;
2003 avr_qw_add(r, sum, tmp);
2004#endif
2005}
2006
2007void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2008{
2009#ifdef CONFIG_INT128
2010 r->u128 = (~a->u128 < ~b->u128) ||
2011 (a->u128 + ~b->u128 == (__uint128_t)-1);
2012#else
2013 int carry = (avr_qw_cmpu(*a, *b) > 0);
2014 if (!carry) {
2015 ppc_avr_t tmp;
2016 avr_qw_not(&tmp, *b);
2017 avr_qw_add(&tmp, *a, tmp);
2018 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2019 }
2020 r->VsrD(0) = 0;
2021 r->VsrD(1) = carry;
2022#endif
2023}
2024
2025void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2026{
2027#ifdef CONFIG_INT128
2028 r->u128 =
2029 (~a->u128 < ~b->u128) ||
2030 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2031#else
2032 int carry_in = c->VsrD(1) & 1;
2033 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2034 if (!carry_out && carry_in) {
2035 ppc_avr_t tmp;
2036 avr_qw_not(&tmp, *b);
2037 avr_qw_add(&tmp, *a, tmp);
2038 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2039 }
2040
2041 r->VsrD(0) = 0;
2042 r->VsrD(1) = carry_out;
2043#endif
2044}
2045
2046#define BCD_PLUS_PREF_1 0xC
2047#define BCD_PLUS_PREF_2 0xF
2048#define BCD_PLUS_ALT_1 0xA
2049#define BCD_NEG_PREF 0xD
2050#define BCD_NEG_ALT 0xB
2051#define BCD_PLUS_ALT_2 0xE
2052#define NATIONAL_PLUS 0x2B
2053#define NATIONAL_NEG 0x2D
2054
2055#if defined(HOST_WORDS_BIGENDIAN)
2056#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2057#else
2058#define BCD_DIG_BYTE(n) ((n) / 2)
2059#endif
2060
2061static int bcd_get_sgn(ppc_avr_t *bcd)
2062{
2063 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2064 case BCD_PLUS_PREF_1:
2065 case BCD_PLUS_PREF_2:
2066 case BCD_PLUS_ALT_1:
2067 case BCD_PLUS_ALT_2:
2068 {
2069 return 1;
2070 }
2071
2072 case BCD_NEG_PREF:
2073 case BCD_NEG_ALT:
2074 {
2075 return -1;
2076 }
2077
2078 default:
2079 {
2080 return 0;
2081 }
2082 }
2083}
2084
2085static int bcd_preferred_sgn(int sgn, int ps)
2086{
2087 if (sgn >= 0) {
2088 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2089 } else {
2090 return BCD_NEG_PREF;
2091 }
2092}
2093
2094static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2095{
2096 uint8_t result;
2097 if (n & 1) {
2098 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2099 } else {
2100 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2101 }
2102
2103 if (unlikely(result > 9)) {
2104 *invalid = true;
2105 }
2106 return result;
2107}
2108
2109static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2110{
2111 if (n & 1) {
2112 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2113 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4);
2114 } else {
2115 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2116 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2117 }
2118}
2119
2120static bool bcd_is_valid(ppc_avr_t *bcd)
2121{
2122 int i;
2123 int invalid = 0;
2124
2125 if (bcd_get_sgn(bcd) == 0) {
2126 return false;
2127 }
2128
2129 for (i = 1; i < 32; i++) {
2130 bcd_get_digit(bcd, i, &invalid);
2131 if (unlikely(invalid)) {
2132 return false;
2133 }
2134 }
2135 return true;
2136}
2137
2138static int bcd_cmp_zero(ppc_avr_t *bcd)
2139{
2140 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2141 return CRF_EQ;
2142 } else {
2143 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2144 }
2145}
2146
2147static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2148{
2149 return reg->VsrH(7 - n);
2150}
2151
2152static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2153{
2154 reg->VsrH(7 - n) = val;
2155}
2156
2157static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2158{
2159 int i;
2160 int invalid = 0;
2161 for (i = 31; i > 0; i--) {
2162 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2163 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2164 if (unlikely(invalid)) {
2165 return 0; /* doesn't matter */
2166 } else if (dig_a > dig_b) {
2167 return 1;
2168 } else if (dig_a < dig_b) {
2169 return -1;
2170 }
2171 }
2172
2173 return 0;
2174}
2175
2176static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2177 int *overflow)
2178{
2179 int carry = 0;
2180 int i;
2181 for (i = 1; i <= 31; i++) {
2182 uint8_t digit = bcd_get_digit(a, i, invalid) +
2183 bcd_get_digit(b, i, invalid) + carry;
2184 if (digit > 9) {
2185 carry = 1;
2186 digit -= 10;
2187 } else {
2188 carry = 0;
2189 }
2190
2191 bcd_put_digit(t, digit, i);
2192 }
2193
2194 *overflow = carry;
2195}
2196
2197static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2198 int *overflow)
2199{
2200 int carry = 0;
2201 int i;
2202
2203 for (i = 1; i <= 31; i++) {
2204 uint8_t digit = bcd_get_digit(a, i, invalid) -
2205 bcd_get_digit(b, i, invalid) + carry;
2206 if (digit & 0x80) {
2207 carry = -1;
2208 digit += 10;
2209 } else {
2210 carry = 0;
2211 }
2212
2213 bcd_put_digit(t, digit, i);
2214 }
2215
2216 *overflow = carry;
2217}
2218
2219uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2220{
2221
2222 int sgna = bcd_get_sgn(a);
2223 int sgnb = bcd_get_sgn(b);
2224 int invalid = (sgna == 0) || (sgnb == 0);
2225 int overflow = 0;
2226 uint32_t cr = 0;
2227 ppc_avr_t result = { .u64 = { 0, 0 } };
2228
2229 if (!invalid) {
2230 if (sgna == sgnb) {
2231 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2232 bcd_add_mag(&result, a, b, &invalid, &overflow);
2233 cr = bcd_cmp_zero(&result);
2234 } else {
2235 int magnitude = bcd_cmp_mag(a, b);
2236 if (magnitude > 0) {
2237 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2238 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2239 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2240 } else if (magnitude < 0) {
2241 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2242 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2243 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2244 } else {
2245 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2246 cr = CRF_EQ;
2247 }
2248 }
2249 }
2250
2251 if (unlikely(invalid)) {
2252 result.VsrD(0) = result.VsrD(1) = -1;
2253 cr = CRF_SO;
2254 } else if (overflow) {
2255 cr |= CRF_SO;
2256 }
2257
2258 *r = result;
2259
2260 return cr;
2261}
2262
2263uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2264{
2265 ppc_avr_t bcopy = *b;
2266 int sgnb = bcd_get_sgn(b);
2267 if (sgnb < 0) {
2268 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2269 } else if (sgnb > 0) {
2270 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2271 }
2272 /* else invalid ... defer to bcdadd code for proper handling */
2273
2274 return helper_bcdadd(r, a, &bcopy, ps);
2275}
2276
2277uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2278{
2279 int i;
2280 int cr = 0;
2281 uint16_t national = 0;
2282 uint16_t sgnb = get_national_digit(b, 0);
2283 ppc_avr_t ret = { .u64 = { 0, 0 } };
2284 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2285
2286 for (i = 1; i < 8; i++) {
2287 national = get_national_digit(b, i);
2288 if (unlikely(national < 0x30 || national > 0x39)) {
2289 invalid = 1;
2290 break;
2291 }
2292
2293 bcd_put_digit(&ret, national & 0xf, i);
2294 }
2295
2296 if (sgnb == NATIONAL_PLUS) {
2297 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2298 } else {
2299 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2300 }
2301
2302 cr = bcd_cmp_zero(&ret);
2303
2304 if (unlikely(invalid)) {
2305 cr = CRF_SO;
2306 }
2307
2308 *r = ret;
2309
2310 return cr;
2311}
2312
2313uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2314{
2315 int i;
2316 int cr = 0;
2317 int sgnb = bcd_get_sgn(b);
2318 int invalid = (sgnb == 0);
2319 ppc_avr_t ret = { .u64 = { 0, 0 } };
2320
2321 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2322
2323 for (i = 1; i < 8; i++) {
2324 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2325
2326 if (unlikely(invalid)) {
2327 break;
2328 }
2329 }
2330 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2331
2332 cr = bcd_cmp_zero(b);
2333
2334 if (ox_flag) {
2335 cr |= CRF_SO;
2336 }
2337
2338 if (unlikely(invalid)) {
2339 cr = CRF_SO;
2340 }
2341
2342 *r = ret;
2343
2344 return cr;
2345}
2346
2347uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2348{
2349 int i;
2350 int cr = 0;
2351 int invalid = 0;
2352 int zone_digit = 0;
2353 int zone_lead = ps ? 0xF : 0x3;
2354 int digit = 0;
2355 ppc_avr_t ret = { .u64 = { 0, 0 } };
2356 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2357
2358 if (unlikely((sgnb < 0xA) && ps)) {
2359 invalid = 1;
2360 }
2361
2362 for (i = 0; i < 16; i++) {
2363 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2364 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2365 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2366 invalid = 1;
2367 break;
2368 }
2369
2370 bcd_put_digit(&ret, digit, i + 1);
2371 }
2372
2373 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2374 (!ps && (sgnb & 0x4))) {
2375 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2376 } else {
2377 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2378 }
2379
2380 cr = bcd_cmp_zero(&ret);
2381
2382 if (unlikely(invalid)) {
2383 cr = CRF_SO;
2384 }
2385
2386 *r = ret;
2387
2388 return cr;
2389}
2390
2391uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2392{
2393 int i;
2394 int cr = 0;
2395 uint8_t digit = 0;
2396 int sgnb = bcd_get_sgn(b);
2397 int zone_lead = (ps) ? 0xF0 : 0x30;
2398 int invalid = (sgnb == 0);
2399 ppc_avr_t ret = { .u64 = { 0, 0 } };
2400
2401 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2402
2403 for (i = 0; i < 16; i++) {
2404 digit = bcd_get_digit(b, i + 1, &invalid);
2405
2406 if (unlikely(invalid)) {
2407 break;
2408 }
2409
2410 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2411 }
2412
2413 if (ps) {
2414 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2415 } else {
2416 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2417 }
2418
2419 cr = bcd_cmp_zero(b);
2420
2421 if (ox_flag) {
2422 cr |= CRF_SO;
2423 }
2424
2425 if (unlikely(invalid)) {
2426 cr = CRF_SO;
2427 }
2428
2429 *r = ret;
2430
2431 return cr;
2432}
2433
2434uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2435{
2436 int i;
2437 int cr = 0;
2438 uint64_t lo_value;
2439 uint64_t hi_value;
2440 ppc_avr_t ret = { .u64 = { 0, 0 } };
2441
2442 if (b->VsrSD(0) < 0) {
2443 lo_value = -b->VsrSD(1);
2444 hi_value = ~b->VsrD(0) + !lo_value;
2445 bcd_put_digit(&ret, 0xD, 0);
2446 } else {
2447 lo_value = b->VsrD(1);
2448 hi_value = b->VsrD(0);
2449 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2450 }
2451
2452 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2453 lo_value > 9999999999999999ULL) {
2454 cr = CRF_SO;
2455 }
2456
2457 for (i = 1; i < 16; hi_value /= 10, i++) {
2458 bcd_put_digit(&ret, hi_value % 10, i);
2459 }
2460
2461 for (; i < 32; lo_value /= 10, i++) {
2462 bcd_put_digit(&ret, lo_value % 10, i);
2463 }
2464
2465 cr |= bcd_cmp_zero(&ret);
2466
2467 *r = ret;
2468
2469 return cr;
2470}
2471
2472uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2473{
2474 uint8_t i;
2475 int cr;
2476 uint64_t carry;
2477 uint64_t unused;
2478 uint64_t lo_value;
2479 uint64_t hi_value = 0;
2480 int sgnb = bcd_get_sgn(b);
2481 int invalid = (sgnb == 0);
2482
2483 lo_value = bcd_get_digit(b, 31, &invalid);
2484 for (i = 30; i > 0; i--) {
2485 mulu64(&lo_value, &carry, lo_value, 10ULL);
2486 mulu64(&hi_value, &unused, hi_value, 10ULL);
2487 lo_value += bcd_get_digit(b, i, &invalid);
2488 hi_value += carry;
2489
2490 if (unlikely(invalid)) {
2491 break;
2492 }
2493 }
2494
2495 if (sgnb == -1) {
2496 r->VsrSD(1) = -lo_value;
2497 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2498 } else {
2499 r->VsrSD(1) = lo_value;
2500 r->VsrSD(0) = hi_value;
2501 }
2502
2503 cr = bcd_cmp_zero(b);
2504
2505 if (unlikely(invalid)) {
2506 cr = CRF_SO;
2507 }
2508
2509 return cr;
2510}
2511
2512uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2513{
2514 int i;
2515 int invalid = 0;
2516
2517 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2518 return CRF_SO;
2519 }
2520
2521 *r = *a;
2522 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2523
2524 for (i = 1; i < 32; i++) {
2525 bcd_get_digit(a, i, &invalid);
2526 bcd_get_digit(b, i, &invalid);
2527 if (unlikely(invalid)) {
2528 return CRF_SO;
2529 }
2530 }
2531
2532 return bcd_cmp_zero(r);
2533}
2534
2535uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2536{
2537 int sgnb = bcd_get_sgn(b);
2538
2539 *r = *b;
2540 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2541
2542 if (bcd_is_valid(b) == false) {
2543 return CRF_SO;
2544 }
2545
2546 return bcd_cmp_zero(r);
2547}
2548
2549uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2550{
2551 int cr;
2552#if defined(HOST_WORDS_BIGENDIAN)
2553 int i = a->s8[7];
2554#else
2555 int i = a->s8[8];
2556#endif
2557 bool ox_flag = false;
2558 int sgnb = bcd_get_sgn(b);
2559 ppc_avr_t ret = *b;
2560 ret.VsrD(1) &= ~0xf;
2561
2562 if (bcd_is_valid(b) == false) {
2563 return CRF_SO;
2564 }
2565
2566 if (unlikely(i > 31)) {
2567 i = 31;
2568 } else if (unlikely(i < -31)) {
2569 i = -31;
2570 }
2571
2572 if (i > 0) {
2573 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2574 } else {
2575 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2576 }
2577 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2578
2579 *r = ret;
2580
2581 cr = bcd_cmp_zero(r);
2582 if (ox_flag) {
2583 cr |= CRF_SO;
2584 }
2585
2586 return cr;
2587}
2588
2589uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2590{
2591 int cr;
2592 int i;
2593 int invalid = 0;
2594 bool ox_flag = false;
2595 ppc_avr_t ret = *b;
2596
2597 for (i = 0; i < 32; i++) {
2598 bcd_get_digit(b, i, &invalid);
2599
2600 if (unlikely(invalid)) {
2601 return CRF_SO;
2602 }
2603 }
2604
2605#if defined(HOST_WORDS_BIGENDIAN)
2606 i = a->s8[7];
2607#else
2608 i = a->s8[8];
2609#endif
2610 if (i >= 32) {
2611 ox_flag = true;
2612 ret.VsrD(1) = ret.VsrD(0) = 0;
2613 } else if (i <= -32) {
2614 ret.VsrD(1) = ret.VsrD(0) = 0;
2615 } else if (i > 0) {
2616 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2617 } else {
2618 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2619 }
2620 *r = ret;
2621
2622 cr = bcd_cmp_zero(r);
2623 if (ox_flag) {
2624 cr |= CRF_SO;
2625 }
2626
2627 return cr;
2628}
2629
2630uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2631{
2632 int cr;
2633 int unused = 0;
2634 int invalid = 0;
2635 bool ox_flag = false;
2636 int sgnb = bcd_get_sgn(b);
2637 ppc_avr_t ret = *b;
2638 ret.VsrD(1) &= ~0xf;
2639
2640#if defined(HOST_WORDS_BIGENDIAN)
2641 int i = a->s8[7];
2642 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
2643#else
2644 int i = a->s8[8];
2645 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
2646#endif
2647
2648 if (bcd_is_valid(b) == false) {
2649 return CRF_SO;
2650 }
2651
2652 if (unlikely(i > 31)) {
2653 i = 31;
2654 } else if (unlikely(i < -31)) {
2655 i = -31;
2656 }
2657
2658 if (i > 0) {
2659 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2660 } else {
2661 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2662
2663 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2664 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2665 }
2666 }
2667 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2668
2669 cr = bcd_cmp_zero(&ret);
2670 if (ox_flag) {
2671 cr |= CRF_SO;
2672 }
2673 *r = ret;
2674
2675 return cr;
2676}
2677
2678uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2679{
2680 uint64_t mask;
2681 uint32_t ox_flag = 0;
2682#if defined(HOST_WORDS_BIGENDIAN)
2683 int i = a->s16[3] + 1;
2684#else
2685 int i = a->s16[4] + 1;
2686#endif
2687 ppc_avr_t ret = *b;
2688
2689 if (bcd_is_valid(b) == false) {
2690 return CRF_SO;
2691 }
2692
2693 if (i > 16 && i < 32) {
2694 mask = (uint64_t)-1 >> (128 - i * 4);
2695 if (ret.VsrD(0) & ~mask) {
2696 ox_flag = CRF_SO;
2697 }
2698
2699 ret.VsrD(0) &= mask;
2700 } else if (i >= 0 && i <= 16) {
2701 mask = (uint64_t)-1 >> (64 - i * 4);
2702 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2703 ox_flag = CRF_SO;
2704 }
2705
2706 ret.VsrD(1) &= mask;
2707 ret.VsrD(0) = 0;
2708 }
2709 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2710 *r = ret;
2711
2712 return bcd_cmp_zero(&ret) | ox_flag;
2713}
2714
2715uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2716{
2717 int i;
2718 uint64_t mask;
2719 uint32_t ox_flag = 0;
2720 int invalid = 0;
2721 ppc_avr_t ret = *b;
2722
2723 for (i = 0; i < 32; i++) {
2724 bcd_get_digit(b, i, &invalid);
2725
2726 if (unlikely(invalid)) {
2727 return CRF_SO;
2728 }
2729 }
2730
2731#if defined(HOST_WORDS_BIGENDIAN)
2732 i = a->s16[3];
2733#else
2734 i = a->s16[4];
2735#endif
2736 if (i > 16 && i < 33) {
2737 mask = (uint64_t)-1 >> (128 - i * 4);
2738 if (ret.VsrD(0) & ~mask) {
2739 ox_flag = CRF_SO;
2740 }
2741
2742 ret.VsrD(0) &= mask;
2743 } else if (i > 0 && i <= 16) {
2744 mask = (uint64_t)-1 >> (64 - i * 4);
2745 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2746 ox_flag = CRF_SO;
2747 }
2748
2749 ret.VsrD(1) &= mask;
2750 ret.VsrD(0) = 0;
2751 } else if (i == 0) {
2752 if (ret.VsrD(0) || ret.VsrD(1)) {
2753 ox_flag = CRF_SO;
2754 }
2755 ret.VsrD(0) = ret.VsrD(1) = 0;
2756 }
2757
2758 *r = ret;
2759 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2760 return ox_flag | CRF_EQ;
2761 }
2762
2763 return ox_flag | CRF_GT;
2764}
2765
2766void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2767{
2768 int i;
2769 VECTOR_FOR_INORDER_I(i, u8) {
2770 r->u8[i] = AES_sbox[a->u8[i]];
2771 }
2772}
2773
2774void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2775{
2776 ppc_avr_t result;
2777 int i;
2778
2779 VECTOR_FOR_INORDER_I(i, u32) {
2780 result.VsrW(i) = b->VsrW(i) ^
2781 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2782 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2783 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2784 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2785 }
2786 *r = result;
2787}
2788
2789void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2790{
2791 ppc_avr_t result;
2792 int i;
2793
2794 VECTOR_FOR_INORDER_I(i, u8) {
2795 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2796 }
2797 *r = result;
2798}
2799
2800void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2801{
2802 /* This differs from what is written in ISA V2.07. The RTL is */
2803 /* incorrect and will be fixed in V2.07B. */
2804 int i;
2805 ppc_avr_t tmp;
2806
2807 VECTOR_FOR_INORDER_I(i, u8) {
2808 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2809 }
2810
2811 VECTOR_FOR_INORDER_I(i, u32) {
2812 r->VsrW(i) =
2813 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2814 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2815 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2816 AES_imc[tmp.VsrB(4 * i + 3)][3];
2817 }
2818}
2819
2820void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2821{
2822 ppc_avr_t result;
2823 int i;
2824
2825 VECTOR_FOR_INORDER_I(i, u8) {
2826 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2827 }
2828 *r = result;
2829}
2830
2831void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2832{
2833 int st = (st_six & 0x10) != 0;
2834 int six = st_six & 0xF;
2835 int i;
2836
2837 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2838 if (st == 0) {
2839 if ((six & (0x8 >> i)) == 0) {
2840 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2841 ror32(a->VsrW(i), 18) ^
2842 (a->VsrW(i) >> 3);
2843 } else { /* six.bit[i] == 1 */
2844 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2845 ror32(a->VsrW(i), 19) ^
2846 (a->VsrW(i) >> 10);
2847 }
2848 } else { /* st == 1 */
2849 if ((six & (0x8 >> i)) == 0) {
2850 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2851 ror32(a->VsrW(i), 13) ^
2852 ror32(a->VsrW(i), 22);
2853 } else { /* six.bit[i] == 1 */
2854 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2855 ror32(a->VsrW(i), 11) ^
2856 ror32(a->VsrW(i), 25);
2857 }
2858 }
2859 }
2860}
2861
2862void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2863{
2864 int st = (st_six & 0x10) != 0;
2865 int six = st_six & 0xF;
2866 int i;
2867
2868 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2869 if (st == 0) {
2870 if ((six & (0x8 >> (2 * i))) == 0) {
2871 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2872 ror64(a->VsrD(i), 8) ^
2873 (a->VsrD(i) >> 7);
2874 } else { /* six.bit[2*i] == 1 */
2875 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2876 ror64(a->VsrD(i), 61) ^
2877 (a->VsrD(i) >> 6);
2878 }
2879 } else { /* st == 1 */
2880 if ((six & (0x8 >> (2 * i))) == 0) {
2881 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2882 ror64(a->VsrD(i), 34) ^
2883 ror64(a->VsrD(i), 39);
2884 } else { /* six.bit[2*i] == 1 */
2885 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2886 ror64(a->VsrD(i), 18) ^
2887 ror64(a->VsrD(i), 41);
2888 }
2889 }
2890 }
2891}
2892
2893void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2894{
2895 ppc_avr_t result;
2896 int i;
2897
2898 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2899 int indexA = c->VsrB(i) >> 4;
2900 int indexB = c->VsrB(i) & 0xF;
2901
2902 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2903 }
2904 *r = result;
2905}
2906
2907#undef VECTOR_FOR_INORDER_I
2908
2909/*****************************************************************************/
2910/* SPE extension helpers */
2911/* Use a table to make this quicker */
2912static const uint8_t hbrev[16] = {
2913 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2914 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2915};
2916
2917static inline uint8_t byte_reverse(uint8_t val)
2918{
2919 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2920}
2921
2922static inline uint32_t word_reverse(uint32_t val)
2923{
2924 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2925 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2926}
2927
2928#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2929target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2930{
2931 uint32_t a, b, d, mask;
2932
2933 mask = UINT32_MAX >> (32 - MASKBITS);
2934 a = arg1 & mask;
2935 b = arg2 & mask;
2936 d = word_reverse(1 + word_reverse(a | ~b));
2937 return (arg1 & ~mask) | (d & b);
2938}
2939
2940uint32_t helper_cntlsw32(uint32_t val)
2941{
2942 if (val & 0x80000000) {
2943 return clz32(~val);
2944 } else {
2945 return clz32(val);
2946 }
2947}
2948
2949uint32_t helper_cntlzw32(uint32_t val)
2950{
2951 return clz32(val);
2952}
2953
2954/* 440 specific */
2955target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2956 target_ulong low, uint32_t update_Rc)
2957{
2958 target_ulong mask;
2959 int i;
2960
2961 i = 1;
2962 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2963 if ((high & mask) == 0) {
2964 if (update_Rc) {
2965 env->crf[0] = 0x4;
2966 }
2967 goto done;
2968 }
2969 i++;
2970 }
2971 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2972 if ((low & mask) == 0) {
2973 if (update_Rc) {
2974 env->crf[0] = 0x8;
2975 }
2976 goto done;
2977 }
2978 i++;
2979 }
2980 i = 8;
2981 if (update_Rc) {
2982 env->crf[0] = 0x2;
2983 }
2984 done:
2985 env->xer = (env->xer & ~0x7F) | i;
2986 if (update_Rc) {
2987 env->crf[0] |= xer_so;
2988 }
2989 return i;
2990}
2991