1/*
2 * S/390 memory access helper routines
3 *
4 * Copyright (c) 2009 Ulrich Hecht
5 * Copyright (c) 2009 Alexander Graf
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21#include "qemu/osdep.h"
22#include "cpu.h"
23#include "internal.h"
24#include "exec/helper-proto.h"
25#include "exec/exec-all.h"
26#include "exec/cpu_ldst.h"
27#include "qemu/int128.h"
28#include "qemu/atomic128.h"
29
30#if !defined(CONFIG_USER_ONLY)
31#include "hw/s390x/storage-keys.h"
32#endif
33
34/*****************************************************************************/
35/* Softmmu support */
36
37/* #define DEBUG_HELPER */
38#ifdef DEBUG_HELPER
39#define HELPER_LOG(x...) qemu_log(x)
40#else
41#define HELPER_LOG(x...)
42#endif
43
44static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
45{
46 uint16_t pkm = env->cregs[3] >> 16;
47
48 if (env->psw.mask & PSW_MASK_PSTATE) {
49 /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
50 return pkm & (0x80 >> psw_key);
51 }
52 return true;
53}
54
55/* Reduce the length so that addr + len doesn't cross a page boundary. */
56static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr)
57{
58#ifndef CONFIG_USER_ONLY
59 if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
60 return -(addr | TARGET_PAGE_MASK);
61 }
62#endif
63 return len;
64}
65
66/* Trigger a SPECIFICATION exception if an address or a length is not
67 naturally aligned. */
68static inline void check_alignment(CPUS390XState *env, uint64_t v,
69 int wordsize, uintptr_t ra)
70{
71 if (v % wordsize) {
72 s390_program_interrupt(env, PGM_SPECIFICATION, 6, ra);
73 }
74}
75
76/* Load a value from memory according to its size. */
77static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
78 int wordsize, uintptr_t ra)
79{
80 switch (wordsize) {
81 case 1:
82 return cpu_ldub_data_ra(env, addr, ra);
83 case 2:
84 return cpu_lduw_data_ra(env, addr, ra);
85 default:
86 abort();
87 }
88}
89
90/* Store a to memory according to its size. */
91static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
92 uint64_t value, int wordsize,
93 uintptr_t ra)
94{
95 switch (wordsize) {
96 case 1:
97 cpu_stb_data_ra(env, addr, value, ra);
98 break;
99 case 2:
100 cpu_stw_data_ra(env, addr, value, ra);
101 break;
102 default:
103 abort();
104 }
105}
106
107static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
108 uint32_t l, uintptr_t ra)
109{
110 int mmu_idx = cpu_mmu_index(env, false);
111
112 while (l > 0) {
113 void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
114 if (p) {
115 /* Access to the whole page in write mode granted. */
116 uint32_t l_adj = adj_len_to_page(l, dest);
117 memset(p, byte, l_adj);
118 dest += l_adj;
119 l -= l_adj;
120 } else {
121 /* We failed to get access to the whole page. The next write
122 access will likely fill the QEMU TLB for the next iteration. */
123 cpu_stb_data_ra(env, dest, byte, ra);
124 dest++;
125 l--;
126 }
127 }
128}
129
130#ifndef CONFIG_USER_ONLY
131static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src,
132 uint32_t len, int dest_idx, int src_idx,
133 uintptr_t ra)
134{
135 TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx);
136 TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx);
137 uint32_t len_adj;
138 void *src_p;
139 void *dest_p;
140 uint8_t x;
141
142 while (len > 0) {
143 src = wrap_address(env, src);
144 dest = wrap_address(env, dest);
145 src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx);
146 dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx);
147
148 if (src_p && dest_p) {
149 /* Access to both whole pages granted. */
150 len_adj = adj_len_to_page(adj_len_to_page(len, src), dest);
151 memmove(dest_p, src_p, len_adj);
152 } else {
153 /* We failed to get access to one or both whole pages. The next
154 read or write access will likely fill the QEMU TLB for the
155 next iteration. */
156 len_adj = 1;
157 x = helper_ret_ldub_mmu(env, src, oi_src, ra);
158 helper_ret_stb_mmu(env, dest, x, oi_dest, ra);
159 }
160 src += len_adj;
161 dest += len_adj;
162 len -= len_adj;
163 }
164}
165
166static int mmu_idx_from_as(uint8_t as)
167{
168 switch (as) {
169 case AS_PRIMARY:
170 return MMU_PRIMARY_IDX;
171 case AS_SECONDARY:
172 return MMU_SECONDARY_IDX;
173 case AS_HOME:
174 return MMU_HOME_IDX;
175 default:
176 /* FIXME AS_ACCREG */
177 g_assert_not_reached();
178 }
179}
180
181static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
182 uint32_t len, uint8_t dest_as, uint8_t src_as,
183 uintptr_t ra)
184{
185 int src_idx = mmu_idx_from_as(src_as);
186 int dest_idx = mmu_idx_from_as(dest_as);
187
188 fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
189}
190#endif
191
192static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
193 uint32_t l, uintptr_t ra)
194{
195 int mmu_idx = cpu_mmu_index(env, false);
196
197 while (l > 0) {
198 void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
199 void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
200 if (src_p && dest_p) {
201 /* Access to both whole pages granted. */
202 uint32_t l_adj = adj_len_to_page(l, src);
203 l_adj = adj_len_to_page(l_adj, dest);
204 memmove(dest_p, src_p, l_adj);
205 src += l_adj;
206 dest += l_adj;
207 l -= l_adj;
208 } else {
209 /* We failed to get access to one or both whole pages. The next
210 read or write access will likely fill the QEMU TLB for the
211 next iteration. */
212 cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra);
213 src++;
214 dest++;
215 l--;
216 }
217 }
218}
219
220/* and on array */
221static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
222 uint64_t src, uintptr_t ra)
223{
224 uint32_t i;
225 uint8_t c = 0;
226
227 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
228 __func__, l, dest, src);
229
230 for (i = 0; i <= l; i++) {
231 uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
232 x &= cpu_ldub_data_ra(env, dest + i, ra);
233 c |= x;
234 cpu_stb_data_ra(env, dest + i, x, ra);
235 }
236 return c != 0;
237}
238
239uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
240 uint64_t src)
241{
242 return do_helper_nc(env, l, dest, src, GETPC());
243}
244
245/* xor on array */
246static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
247 uint64_t src, uintptr_t ra)
248{
249 uint32_t i;
250 uint8_t c = 0;
251
252 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
253 __func__, l, dest, src);
254
255 /* xor with itself is the same as memset(0) */
256 if (src == dest) {
257 fast_memset(env, dest, 0, l + 1, ra);
258 return 0;
259 }
260
261 for (i = 0; i <= l; i++) {
262 uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
263 x ^= cpu_ldub_data_ra(env, dest + i, ra);
264 c |= x;
265 cpu_stb_data_ra(env, dest + i, x, ra);
266 }
267 return c != 0;
268}
269
270uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
271 uint64_t src)
272{
273 return do_helper_xc(env, l, dest, src, GETPC());
274}
275
276/* or on array */
277static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
278 uint64_t src, uintptr_t ra)
279{
280 uint32_t i;
281 uint8_t c = 0;
282
283 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
284 __func__, l, dest, src);
285
286 for (i = 0; i <= l; i++) {
287 uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
288 x |= cpu_ldub_data_ra(env, dest + i, ra);
289 c |= x;
290 cpu_stb_data_ra(env, dest + i, x, ra);
291 }
292 return c != 0;
293}
294
295uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
296 uint64_t src)
297{
298 return do_helper_oc(env, l, dest, src, GETPC());
299}
300
301/* memmove */
302static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
303 uint64_t src, uintptr_t ra)
304{
305 uint32_t i;
306
307 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
308 __func__, l, dest, src);
309
310 /* mvc and memmove do not behave the same when areas overlap! */
311 /* mvc with source pointing to the byte after the destination is the
312 same as memset with the first source byte */
313 if (dest == src + 1) {
314 fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l + 1, ra);
315 } else if (dest < src || src + l < dest) {
316 fast_memmove(env, dest, src, l + 1, ra);
317 } else {
318 /* slow version with byte accesses which always work */
319 for (i = 0; i <= l; i++) {
320 uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
321 cpu_stb_data_ra(env, dest + i, x, ra);
322 }
323 }
324
325 return env->cc_op;
326}
327
328void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
329{
330 do_helper_mvc(env, l, dest, src, GETPC());
331}
332
333/* move inverse */
334void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
335{
336 uintptr_t ra = GETPC();
337 int i;
338
339 for (i = 0; i <= l; i++) {
340 uint8_t v = cpu_ldub_data_ra(env, src - i, ra);
341 cpu_stb_data_ra(env, dest + i, v, ra);
342 }
343}
344
345/* move numerics */
346void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
347{
348 uintptr_t ra = GETPC();
349 int i;
350
351 for (i = 0; i <= l; i++) {
352 uint8_t v = cpu_ldub_data_ra(env, dest + i, ra) & 0xf0;
353 v |= cpu_ldub_data_ra(env, src + i, ra) & 0x0f;
354 cpu_stb_data_ra(env, dest + i, v, ra);
355 }
356}
357
358/* move with offset */
359void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
360{
361 uintptr_t ra = GETPC();
362 int len_dest = l >> 4;
363 int len_src = l & 0xf;
364 uint8_t byte_dest, byte_src;
365 int i;
366
367 src += len_src;
368 dest += len_dest;
369
370 /* Handle rightmost byte */
371 byte_src = cpu_ldub_data_ra(env, src, ra);
372 byte_dest = cpu_ldub_data_ra(env, dest, ra);
373 byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
374 cpu_stb_data_ra(env, dest, byte_dest, ra);
375
376 /* Process remaining bytes from right to left */
377 for (i = 1; i <= len_dest; i++) {
378 byte_dest = byte_src >> 4;
379 if (len_src - i >= 0) {
380 byte_src = cpu_ldub_data_ra(env, src - i, ra);
381 } else {
382 byte_src = 0;
383 }
384 byte_dest |= byte_src << 4;
385 cpu_stb_data_ra(env, dest - i, byte_dest, ra);
386 }
387}
388
389/* move zones */
390void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
391{
392 uintptr_t ra = GETPC();
393 int i;
394
395 for (i = 0; i <= l; i++) {
396 uint8_t b = cpu_ldub_data_ra(env, dest + i, ra) & 0x0f;
397 b |= cpu_ldub_data_ra(env, src + i, ra) & 0xf0;
398 cpu_stb_data_ra(env, dest + i, b, ra);
399 }
400}
401
402/* compare unsigned byte arrays */
403static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
404 uint64_t s2, uintptr_t ra)
405{
406 uint32_t i;
407 uint32_t cc = 0;
408
409 HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
410 __func__, l, s1, s2);
411
412 for (i = 0; i <= l; i++) {
413 uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
414 uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
415 HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
416 if (x < y) {
417 cc = 1;
418 break;
419 } else if (x > y) {
420 cc = 2;
421 break;
422 }
423 }
424
425 HELPER_LOG("\n");
426 return cc;
427}
428
429uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
430{
431 return do_helper_clc(env, l, s1, s2, GETPC());
432}
433
434/* compare logical under mask */
435uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
436 uint64_t addr)
437{
438 uintptr_t ra = GETPC();
439 uint32_t cc = 0;
440
441 HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
442 mask, addr);
443
444 while (mask) {
445 if (mask & 8) {
446 uint8_t d = cpu_ldub_data_ra(env, addr, ra);
447 uint8_t r = extract32(r1, 24, 8);
448 HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
449 addr);
450 if (r < d) {
451 cc = 1;
452 break;
453 } else if (r > d) {
454 cc = 2;
455 break;
456 }
457 addr++;
458 }
459 mask = (mask << 1) & 0xf;
460 r1 <<= 8;
461 }
462
463 HELPER_LOG("\n");
464 return cc;
465}
466
467static inline uint64_t get_address(CPUS390XState *env, int reg)
468{
469 return wrap_address(env, env->regs[reg]);
470}
471
472static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
473{
474 if (env->psw.mask & PSW_MASK_64) {
475 /* 64-Bit mode */
476 env->regs[reg] = address;
477 } else {
478 if (!(env->psw.mask & PSW_MASK_32)) {
479 /* 24-Bit mode. According to the PoO it is implementation
480 dependent if bits 32-39 remain unchanged or are set to
481 zeros. Choose the former so that the function can also be
482 used for TRT. */
483 env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
484 } else {
485 /* 31-Bit mode. According to the PoO it is implementation
486 dependent if bit 32 remains unchanged or is set to zero.
487 Choose the latter so that the function can also be used for
488 TRT. */
489 address &= 0x7fffffff;
490 env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
491 }
492 }
493}
494
495static inline uint64_t wrap_length(CPUS390XState *env, uint64_t length)
496{
497 if (!(env->psw.mask & PSW_MASK_64)) {
498 /* 24-Bit and 31-Bit mode */
499 length &= 0x7fffffff;
500 }
501 return length;
502}
503
504static inline uint64_t get_length(CPUS390XState *env, int reg)
505{
506 return wrap_length(env, env->regs[reg]);
507}
508
509static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
510{
511 if (env->psw.mask & PSW_MASK_64) {
512 /* 64-Bit mode */
513 env->regs[reg] = length;
514 } else {
515 /* 24-Bit and 31-Bit mode */
516 env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
517 }
518}
519
520/* search string (c is byte to search, r2 is string, r1 end of string) */
521void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
522{
523 uintptr_t ra = GETPC();
524 uint64_t end, str;
525 uint32_t len;
526 uint8_t v, c = env->regs[0];
527
528 /* Bits 32-55 must contain all 0. */
529 if (env->regs[0] & 0xffffff00u) {
530 s390_program_interrupt(env, PGM_SPECIFICATION, 6, ra);
531 }
532
533 str = get_address(env, r2);
534 end = get_address(env, r1);
535
536 /* Lest we fail to service interrupts in a timely manner, limit the
537 amount of work we're willing to do. For now, let's cap at 8k. */
538 for (len = 0; len < 0x2000; ++len) {
539 if (str + len == end) {
540 /* Character not found. R1 & R2 are unmodified. */
541 env->cc_op = 2;
542 return;
543 }
544 v = cpu_ldub_data_ra(env, str + len, ra);
545 if (v == c) {
546 /* Character found. Set R1 to the location; R2 is unmodified. */
547 env->cc_op = 1;
548 set_address(env, r1, str + len);
549 return;
550 }
551 }
552
553 /* CPU-determined bytes processed. Advance R2 to next byte to process. */
554 env->cc_op = 3;
555 set_address(env, r2, str + len);
556}
557
558void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
559{
560 uintptr_t ra = GETPC();
561 uint32_t len;
562 uint16_t v, c = env->regs[0];
563 uint64_t end, str, adj_end;
564
565 /* Bits 32-47 of R0 must be zero. */
566 if (env->regs[0] & 0xffff0000u) {
567 s390_program_interrupt(env, PGM_SPECIFICATION, 6, ra);
568 }
569
570 str = get_address(env, r2);
571 end = get_address(env, r1);
572
573 /* If the LSB of the two addresses differ, use one extra byte. */
574 adj_end = end + ((str ^ end) & 1);
575
576 /* Lest we fail to service interrupts in a timely manner, limit the
577 amount of work we're willing to do. For now, let's cap at 8k. */
578 for (len = 0; len < 0x2000; len += 2) {
579 if (str + len == adj_end) {
580 /* End of input found. */
581 env->cc_op = 2;
582 return;
583 }
584 v = cpu_lduw_data_ra(env, str + len, ra);
585 if (v == c) {
586 /* Character found. Set R1 to the location; R2 is unmodified. */
587 env->cc_op = 1;
588 set_address(env, r1, str + len);
589 return;
590 }
591 }
592
593 /* CPU-determined bytes processed. Advance R2 to next byte to process. */
594 env->cc_op = 3;
595 set_address(env, r2, str + len);
596}
597
598/* unsigned string compare (c is string terminator) */
599uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
600{
601 uintptr_t ra = GETPC();
602 uint32_t len;
603
604 c = c & 0xff;
605 s1 = wrap_address(env, s1);
606 s2 = wrap_address(env, s2);
607
608 /* Lest we fail to service interrupts in a timely manner, limit the
609 amount of work we're willing to do. For now, let's cap at 8k. */
610 for (len = 0; len < 0x2000; ++len) {
611 uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
612 uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
613 if (v1 == v2) {
614 if (v1 == c) {
615 /* Equal. CC=0, and don't advance the registers. */
616 env->cc_op = 0;
617 env->retxl = s2;
618 return s1;
619 }
620 } else {
621 /* Unequal. CC={1,2}, and advance the registers. Note that
622 the terminator need not be zero, but the string that contains
623 the terminator is by definition "low". */
624 env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
625 env->retxl = s2 + len;
626 return s1 + len;
627 }
628 }
629
630 /* CPU-determined bytes equal; advance the registers. */
631 env->cc_op = 3;
632 env->retxl = s2 + len;
633 return s1 + len;
634}
635
636/* move page */
637uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
638{
639 /* ??? missing r0 handling, which includes access keys, but more
640 importantly optional suppression of the exception! */
641 fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
642 return 0; /* data moved */
643}
644
645/* string copy (c is string terminator) */
646uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s)
647{
648 uintptr_t ra = GETPC();
649 uint32_t len;
650
651 c = c & 0xff;
652 d = wrap_address(env, d);
653 s = wrap_address(env, s);
654
655 /* Lest we fail to service interrupts in a timely manner, limit the
656 amount of work we're willing to do. For now, let's cap at 8k. */
657 for (len = 0; len < 0x2000; ++len) {
658 uint8_t v = cpu_ldub_data_ra(env, s + len, ra);
659 cpu_stb_data_ra(env, d + len, v, ra);
660 if (v == c) {
661 /* Complete. Set CC=1 and advance R1. */
662 env->cc_op = 1;
663 env->retxl = s;
664 return d + len;
665 }
666 }
667
668 /* Incomplete. Set CC=3 and signal to advance R1 and R2. */
669 env->cc_op = 3;
670 env->retxl = s + len;
671 return d + len;
672}
673
674/* load access registers r1 to r3 from memory at a2 */
675void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
676{
677 uintptr_t ra = GETPC();
678 int i;
679
680 if (a2 & 0x3) {
681 /* we either came here by lam or lamy, which have different lengths */
682 s390_program_interrupt(env, PGM_SPECIFICATION, ILEN_AUTO, ra);
683 }
684
685 for (i = r1;; i = (i + 1) % 16) {
686 env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
687 a2 += 4;
688
689 if (i == r3) {
690 break;
691 }
692 }
693}
694
695/* store access registers r1 to r3 in memory at a2 */
696void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
697{
698 uintptr_t ra = GETPC();
699 int i;
700
701 if (a2 & 0x3) {
702 s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
703 }
704
705 for (i = r1;; i = (i + 1) % 16) {
706 cpu_stl_data_ra(env, a2, env->aregs[i], ra);
707 a2 += 4;
708
709 if (i == r3) {
710 break;
711 }
712 }
713}
714
715/* move long helper */
716static inline uint32_t do_mvcl(CPUS390XState *env,
717 uint64_t *dest, uint64_t *destlen,
718 uint64_t *src, uint64_t *srclen,
719 uint16_t pad, int wordsize, uintptr_t ra)
720{
721 uint64_t len = MIN(*srclen, *destlen);
722 uint32_t cc;
723
724 if (*destlen == *srclen) {
725 cc = 0;
726 } else if (*destlen < *srclen) {
727 cc = 1;
728 } else {
729 cc = 2;
730 }
731
732 /* Copy the src array */
733 fast_memmove(env, *dest, *src, len, ra);
734 *src += len;
735 *srclen -= len;
736 *dest += len;
737 *destlen -= len;
738
739 /* Pad the remaining area */
740 if (wordsize == 1) {
741 fast_memset(env, *dest, pad, *destlen, ra);
742 *dest += *destlen;
743 *destlen = 0;
744 } else {
745 /* If remaining length is odd, pad with odd byte first. */
746 if (*destlen & 1) {
747 cpu_stb_data_ra(env, *dest, pad & 0xff, ra);
748 *dest += 1;
749 *destlen -= 1;
750 }
751 /* The remaining length is even, pad using words. */
752 for (; *destlen; *dest += 2, *destlen -= 2) {
753 cpu_stw_data_ra(env, *dest, pad, ra);
754 }
755 }
756
757 return cc;
758}
759
760/* move long */
761uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
762{
763 uintptr_t ra = GETPC();
764 uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
765 uint64_t dest = get_address(env, r1);
766 uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
767 uint64_t src = get_address(env, r2);
768 uint8_t pad = env->regs[r2 + 1] >> 24;
769 uint32_t cc;
770
771 cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
772
773 env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
774 env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
775 set_address(env, r1, dest);
776 set_address(env, r2, src);
777
778 return cc;
779}
780
781/* move long extended */
782uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
783 uint32_t r3)
784{
785 uintptr_t ra = GETPC();
786 uint64_t destlen = get_length(env, r1 + 1);
787 uint64_t dest = get_address(env, r1);
788 uint64_t srclen = get_length(env, r3 + 1);
789 uint64_t src = get_address(env, r3);
790 uint8_t pad = a2;
791 uint32_t cc;
792
793 cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
794
795 set_length(env, r1 + 1, destlen);
796 set_length(env, r3 + 1, srclen);
797 set_address(env, r1, dest);
798 set_address(env, r3, src);
799
800 return cc;
801}
802
803/* move long unicode */
804uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
805 uint32_t r3)
806{
807 uintptr_t ra = GETPC();
808 uint64_t destlen = get_length(env, r1 + 1);
809 uint64_t dest = get_address(env, r1);
810 uint64_t srclen = get_length(env, r3 + 1);
811 uint64_t src = get_address(env, r3);
812 uint16_t pad = a2;
813 uint32_t cc;
814
815 cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
816
817 set_length(env, r1 + 1, destlen);
818 set_length(env, r3 + 1, srclen);
819 set_address(env, r1, dest);
820 set_address(env, r3, src);
821
822 return cc;
823}
824
825/* compare logical long helper */
826static inline uint32_t do_clcl(CPUS390XState *env,
827 uint64_t *src1, uint64_t *src1len,
828 uint64_t *src3, uint64_t *src3len,
829 uint16_t pad, uint64_t limit,
830 int wordsize, uintptr_t ra)
831{
832 uint64_t len = MAX(*src1len, *src3len);
833 uint32_t cc = 0;
834
835 check_alignment(env, *src1len | *src3len, wordsize, ra);
836
837 if (!len) {
838 return cc;
839 }
840
841 /* Lest we fail to service interrupts in a timely manner, limit the
842 amount of work we're willing to do. */
843 if (len > limit) {
844 len = limit;
845 cc = 3;
846 }
847
848 for (; len; len -= wordsize) {
849 uint16_t v1 = pad;
850 uint16_t v3 = pad;
851
852 if (*src1len) {
853 v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
854 }
855 if (*src3len) {
856 v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
857 }
858
859 if (v1 != v3) {
860 cc = (v1 < v3) ? 1 : 2;
861 break;
862 }
863
864 if (*src1len) {
865 *src1 += wordsize;
866 *src1len -= wordsize;
867 }
868 if (*src3len) {
869 *src3 += wordsize;
870 *src3len -= wordsize;
871 }
872 }
873
874 return cc;
875}
876
877
878/* compare logical long */
879uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
880{
881 uintptr_t ra = GETPC();
882 uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
883 uint64_t src1 = get_address(env, r1);
884 uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
885 uint64_t src3 = get_address(env, r2);
886 uint8_t pad = env->regs[r2 + 1] >> 24;
887 uint32_t cc;
888
889 cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
890
891 env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
892 env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
893 set_address(env, r1, src1);
894 set_address(env, r2, src3);
895
896 return cc;
897}
898
899/* compare logical long extended memcompare insn with padding */
900uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
901 uint32_t r3)
902{
903 uintptr_t ra = GETPC();
904 uint64_t src1len = get_length(env, r1 + 1);
905 uint64_t src1 = get_address(env, r1);
906 uint64_t src3len = get_length(env, r3 + 1);
907 uint64_t src3 = get_address(env, r3);
908 uint8_t pad = a2;
909 uint32_t cc;
910
911 cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
912
913 set_length(env, r1 + 1, src1len);
914 set_length(env, r3 + 1, src3len);
915 set_address(env, r1, src1);
916 set_address(env, r3, src3);
917
918 return cc;
919}
920
921/* compare logical long unicode memcompare insn with padding */
922uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
923 uint32_t r3)
924{
925 uintptr_t ra = GETPC();
926 uint64_t src1len = get_length(env, r1 + 1);
927 uint64_t src1 = get_address(env, r1);
928 uint64_t src3len = get_length(env, r3 + 1);
929 uint64_t src3 = get_address(env, r3);
930 uint16_t pad = a2;
931 uint32_t cc = 0;
932
933 cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
934
935 set_length(env, r1 + 1, src1len);
936 set_length(env, r3 + 1, src3len);
937 set_address(env, r1, src1);
938 set_address(env, r3, src3);
939
940 return cc;
941}
942
943/* checksum */
944uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
945 uint64_t src, uint64_t src_len)
946{
947 uintptr_t ra = GETPC();
948 uint64_t max_len, len;
949 uint64_t cksm = (uint32_t)r1;
950
951 /* Lest we fail to service interrupts in a timely manner, limit the
952 amount of work we're willing to do. For now, let's cap at 8k. */
953 max_len = (src_len > 0x2000 ? 0x2000 : src_len);
954
955 /* Process full words as available. */
956 for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
957 cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
958 }
959
960 switch (max_len - len) {
961 case 1:
962 cksm += cpu_ldub_data_ra(env, src, ra) << 24;
963 len += 1;
964 break;
965 case 2:
966 cksm += cpu_lduw_data_ra(env, src, ra) << 16;
967 len += 2;
968 break;
969 case 3:
970 cksm += cpu_lduw_data_ra(env, src, ra) << 16;
971 cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
972 len += 3;
973 break;
974 }
975
976 /* Fold the carry from the checksum. Note that we can see carry-out
977 during folding more than once (but probably not more than twice). */
978 while (cksm > 0xffffffffull) {
979 cksm = (uint32_t)cksm + (cksm >> 32);
980 }
981
982 /* Indicate whether or not we've processed everything. */
983 env->cc_op = (len == src_len ? 0 : 3);
984
985 /* Return both cksm and processed length. */
986 env->retxl = cksm;
987 return len;
988}
989
990void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
991{
992 uintptr_t ra = GETPC();
993 int len_dest = len >> 4;
994 int len_src = len & 0xf;
995 uint8_t b;
996
997 dest += len_dest;
998 src += len_src;
999
1000 /* last byte is special, it only flips the nibbles */
1001 b = cpu_ldub_data_ra(env, src, ra);
1002 cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1003 src--;
1004 len_src--;
1005
1006 /* now pack every value */
1007 while (len_dest > 0) {
1008 b = 0;
1009
1010 if (len_src >= 0) {
1011 b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1012 src--;
1013 len_src--;
1014 }
1015 if (len_src >= 0) {
1016 b |= cpu_ldub_data_ra(env, src, ra) << 4;
1017 src--;
1018 len_src--;
1019 }
1020
1021 len_dest--;
1022 dest--;
1023 cpu_stb_data_ra(env, dest, b, ra);
1024 }
1025}
1026
1027static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1028 uint32_t srclen, int ssize, uintptr_t ra)
1029{
1030 int i;
1031 /* The destination operand is always 16 bytes long. */
1032 const int destlen = 16;
1033
1034 /* The operands are processed from right to left. */
1035 src += srclen - 1;
1036 dest += destlen - 1;
1037
1038 for (i = 0; i < destlen; i++) {
1039 uint8_t b = 0;
1040
1041 /* Start with a positive sign */
1042 if (i == 0) {
1043 b = 0xc;
1044 } else if (srclen > ssize) {
1045 b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1046 src -= ssize;
1047 srclen -= ssize;
1048 }
1049
1050 if (srclen > ssize) {
1051 b |= cpu_ldub_data_ra(env, src, ra) << 4;
1052 src -= ssize;
1053 srclen -= ssize;
1054 }
1055
1056 cpu_stb_data_ra(env, dest, b, ra);
1057 dest--;
1058 }
1059}
1060
1061
1062void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1063 uint32_t srclen)
1064{
1065 do_pkau(env, dest, src, srclen, 1, GETPC());
1066}
1067
1068void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1069 uint32_t srclen)
1070{
1071 do_pkau(env, dest, src, srclen, 2, GETPC());
1072}
1073
1074void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1075 uint64_t src)
1076{
1077 uintptr_t ra = GETPC();
1078 int len_dest = len >> 4;
1079 int len_src = len & 0xf;
1080 uint8_t b;
1081 int second_nibble = 0;
1082
1083 dest += len_dest;
1084 src += len_src;
1085
1086 /* last byte is special, it only flips the nibbles */
1087 b = cpu_ldub_data_ra(env, src, ra);
1088 cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1089 src--;
1090 len_src--;
1091
1092 /* now pad every nibble with 0xf0 */
1093
1094 while (len_dest > 0) {
1095 uint8_t cur_byte = 0;
1096
1097 if (len_src > 0) {
1098 cur_byte = cpu_ldub_data_ra(env, src, ra);
1099 }
1100
1101 len_dest--;
1102 dest--;
1103
1104 /* only advance one nibble at a time */
1105 if (second_nibble) {
1106 cur_byte >>= 4;
1107 len_src--;
1108 src--;
1109 }
1110 second_nibble = !second_nibble;
1111
1112 /* digit */
1113 cur_byte = (cur_byte & 0xf);
1114 /* zone bits */
1115 cur_byte |= 0xf0;
1116
1117 cpu_stb_data_ra(env, dest, cur_byte, ra);
1118 }
1119}
1120
1121static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1122 uint32_t destlen, int dsize, uint64_t src,
1123 uintptr_t ra)
1124{
1125 int i;
1126 uint32_t cc;
1127 uint8_t b;
1128 /* The source operand is always 16 bytes long. */
1129 const int srclen = 16;
1130
1131 /* The operands are processed from right to left. */
1132 src += srclen - 1;
1133 dest += destlen - dsize;
1134
1135 /* Check for the sign. */
1136 b = cpu_ldub_data_ra(env, src, ra);
1137 src--;
1138 switch (b & 0xf) {
1139 case 0xa:
1140 case 0xc:
1141 case 0xe ... 0xf:
1142 cc = 0; /* plus */
1143 break;
1144 case 0xb:
1145 case 0xd:
1146 cc = 1; /* minus */
1147 break;
1148 default:
1149 case 0x0 ... 0x9:
1150 cc = 3; /* invalid */
1151 break;
1152 }
1153
1154 /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1155 for (i = 0; i < destlen; i += dsize) {
1156 if (i == (31 * dsize)) {
1157 /* If length is 32/64 bytes, the leftmost byte is 0. */
1158 b = 0;
1159 } else if (i % (2 * dsize)) {
1160 b = cpu_ldub_data_ra(env, src, ra);
1161 src--;
1162 } else {
1163 b >>= 4;
1164 }
1165 cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1166 dest -= dsize;
1167 }
1168
1169 return cc;
1170}
1171
1172uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1173 uint64_t src)
1174{
1175 return do_unpkau(env, dest, destlen, 1, src, GETPC());
1176}
1177
1178uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1179 uint64_t src)
1180{
1181 return do_unpkau(env, dest, destlen, 2, src, GETPC());
1182}
1183
1184uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1185{
1186 uintptr_t ra = GETPC();
1187 uint32_t cc = 0;
1188 int i;
1189
1190 for (i = 0; i < destlen; i++) {
1191 uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1192 /* digit */
1193 cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1194
1195 if (i == (destlen - 1)) {
1196 /* sign */
1197 cc |= (b & 0xf) < 0xa ? 1 : 0;
1198 } else {
1199 /* digit */
1200 cc |= (b & 0xf) > 0x9 ? 2 : 0;
1201 }
1202 }
1203
1204 return cc;
1205}
1206
1207static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1208 uint64_t trans, uintptr_t ra)
1209{
1210 uint32_t i;
1211
1212 for (i = 0; i <= len; i++) {
1213 uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1214 uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1215 cpu_stb_data_ra(env, array + i, new_byte, ra);
1216 }
1217
1218 return env->cc_op;
1219}
1220
1221void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1222 uint64_t trans)
1223{
1224 do_helper_tr(env, len, array, trans, GETPC());
1225}
1226
1227uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1228 uint64_t len, uint64_t trans)
1229{
1230 uintptr_t ra = GETPC();
1231 uint8_t end = env->regs[0] & 0xff;
1232 uint64_t l = len;
1233 uint64_t i;
1234 uint32_t cc = 0;
1235
1236 if (!(env->psw.mask & PSW_MASK_64)) {
1237 array &= 0x7fffffff;
1238 l = (uint32_t)l;
1239 }
1240
1241 /* Lest we fail to service interrupts in a timely manner, limit the
1242 amount of work we're willing to do. For now, let's cap at 8k. */
1243 if (l > 0x2000) {
1244 l = 0x2000;
1245 cc = 3;
1246 }
1247
1248 for (i = 0; i < l; i++) {
1249 uint8_t byte, new_byte;
1250
1251 byte = cpu_ldub_data_ra(env, array + i, ra);
1252
1253 if (byte == end) {
1254 cc = 1;
1255 break;
1256 }
1257
1258 new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1259 cpu_stb_data_ra(env, array + i, new_byte, ra);
1260 }
1261
1262 env->cc_op = cc;
1263 env->retxl = len - i;
1264 return array + i;
1265}
1266
1267static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1268 uint64_t array, uint64_t trans,
1269 int inc, uintptr_t ra)
1270{
1271 int i;
1272
1273 for (i = 0; i <= len; i++) {
1274 uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1275 uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1276
1277 if (sbyte != 0) {
1278 set_address(env, 1, array + i * inc);
1279 env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1280 return (i == len) ? 2 : 1;
1281 }
1282 }
1283
1284 return 0;
1285}
1286
1287static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1288 uint64_t array, uint64_t trans,
1289 uintptr_t ra)
1290{
1291 return do_helper_trt(env, len, array, trans, 1, ra);
1292}
1293
1294uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1295 uint64_t trans)
1296{
1297 return do_helper_trt(env, len, array, trans, 1, GETPC());
1298}
1299
1300static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1301 uint64_t array, uint64_t trans,
1302 uintptr_t ra)
1303{
1304 return do_helper_trt(env, len, array, trans, -1, ra);
1305}
1306
1307uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1308 uint64_t trans)
1309{
1310 return do_helper_trt(env, len, array, trans, -1, GETPC());
1311}
1312
1313/* Translate one/two to one/two */
1314uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1315 uint32_t tst, uint32_t sizes)
1316{
1317 uintptr_t ra = GETPC();
1318 int dsize = (sizes & 1) ? 1 : 2;
1319 int ssize = (sizes & 2) ? 1 : 2;
1320 uint64_t tbl = get_address(env, 1);
1321 uint64_t dst = get_address(env, r1);
1322 uint64_t len = get_length(env, r1 + 1);
1323 uint64_t src = get_address(env, r2);
1324 uint32_t cc = 3;
1325 int i;
1326
1327 /* The lower address bits of TBL are ignored. For TROO, TROT, it's
1328 the low 3 bits (double-word aligned). For TRTO, TRTT, it's either
1329 the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH). */
1330 if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1331 tbl &= -4096;
1332 } else {
1333 tbl &= -8;
1334 }
1335
1336 check_alignment(env, len, ssize, ra);
1337
1338 /* Lest we fail to service interrupts in a timely manner, */
1339 /* limit the amount of work we're willing to do. */
1340 for (i = 0; i < 0x2000; i++) {
1341 uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1342 uint64_t tble = tbl + (sval * dsize);
1343 uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1344 if (dval == tst) {
1345 cc = 1;
1346 break;
1347 }
1348 cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1349
1350 len -= ssize;
1351 src += ssize;
1352 dst += dsize;
1353
1354 if (len == 0) {
1355 cc = 0;
1356 break;
1357 }
1358 }
1359
1360 set_address(env, r1, dst);
1361 set_length(env, r1 + 1, len);
1362 set_address(env, r2, src);
1363
1364 return cc;
1365}
1366
1367void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1368 uint32_t r1, uint32_t r3)
1369{
1370 uintptr_t ra = GETPC();
1371 Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1372 Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1373 Int128 oldv;
1374 uint64_t oldh, oldl;
1375 bool fail;
1376
1377 check_alignment(env, addr, 16, ra);
1378
1379 oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1380 oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1381
1382 oldv = int128_make128(oldl, oldh);
1383 fail = !int128_eq(oldv, cmpv);
1384 if (fail) {
1385 newv = oldv;
1386 }
1387
1388 cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1389 cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1390
1391 env->cc_op = fail;
1392 env->regs[r1] = int128_gethi(oldv);
1393 env->regs[r1 + 1] = int128_getlo(oldv);
1394}
1395
1396void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1397 uint32_t r1, uint32_t r3)
1398{
1399 uintptr_t ra = GETPC();
1400 Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1401 Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1402 int mem_idx;
1403 TCGMemOpIdx oi;
1404 Int128 oldv;
1405 bool fail;
1406
1407 assert(HAVE_CMPXCHG128);
1408
1409 mem_idx = cpu_mmu_index(env, false);
1410 oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1411 oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1412 fail = !int128_eq(oldv, cmpv);
1413
1414 env->cc_op = fail;
1415 env->regs[r1] = int128_gethi(oldv);
1416 env->regs[r1 + 1] = int128_getlo(oldv);
1417}
1418
1419static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1420 uint64_t a2, bool parallel)
1421{
1422 uint32_t mem_idx = cpu_mmu_index(env, false);
1423 uintptr_t ra = GETPC();
1424 uint32_t fc = extract32(env->regs[0], 0, 8);
1425 uint32_t sc = extract32(env->regs[0], 8, 8);
1426 uint64_t pl = get_address(env, 1) & -16;
1427 uint64_t svh, svl;
1428 uint32_t cc;
1429
1430 /* Sanity check the function code and storage characteristic. */
1431 if (fc > 1 || sc > 3) {
1432 if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1433 goto spec_exception;
1434 }
1435 if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1436 goto spec_exception;
1437 }
1438 }
1439
1440 /* Sanity check the alignments. */
1441 if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1442 goto spec_exception;
1443 }
1444
1445 /* Sanity check writability of the store address. */
1446 probe_write(env, a2, 1 << sc, mem_idx, ra);
1447
1448 /*
1449 * Note that the compare-and-swap is atomic, and the store is atomic,
1450 * but the complete operation is not. Therefore we do not need to
1451 * assert serial context in order to implement this. That said,
1452 * restart early if we can't support either operation that is supposed
1453 * to be atomic.
1454 */
1455 if (parallel) {
1456 uint32_t max = 2;
1457#ifdef CONFIG_ATOMIC64
1458 max = 3;
1459#endif
1460 if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1461 (HAVE_ATOMIC128 ? 0 : sc > max)) {
1462 cpu_loop_exit_atomic(env_cpu(env), ra);
1463 }
1464 }
1465
1466 /* All loads happen before all stores. For simplicity, load the entire
1467 store value area from the parameter list. */
1468 svh = cpu_ldq_data_ra(env, pl + 16, ra);
1469 svl = cpu_ldq_data_ra(env, pl + 24, ra);
1470
1471 switch (fc) {
1472 case 0:
1473 {
1474 uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1475 uint32_t cv = env->regs[r3];
1476 uint32_t ov;
1477
1478 if (parallel) {
1479#ifdef CONFIG_USER_ONLY
1480 uint32_t *haddr = g2h(a1);
1481 ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1482#else
1483 TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1484 ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1485#endif
1486 } else {
1487 ov = cpu_ldl_data_ra(env, a1, ra);
1488 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1489 }
1490 cc = (ov != cv);
1491 env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1492 }
1493 break;
1494
1495 case 1:
1496 {
1497 uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1498 uint64_t cv = env->regs[r3];
1499 uint64_t ov;
1500
1501 if (parallel) {
1502#ifdef CONFIG_ATOMIC64
1503# ifdef CONFIG_USER_ONLY
1504 uint64_t *haddr = g2h(a1);
1505 ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1506# else
1507 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1508 ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1509# endif
1510#else
1511 /* Note that we asserted !parallel above. */
1512 g_assert_not_reached();
1513#endif
1514 } else {
1515 ov = cpu_ldq_data_ra(env, a1, ra);
1516 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1517 }
1518 cc = (ov != cv);
1519 env->regs[r3] = ov;
1520 }
1521 break;
1522
1523 case 2:
1524 {
1525 uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1526 uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1527 Int128 nv = int128_make128(nvl, nvh);
1528 Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1529 Int128 ov;
1530
1531 if (!parallel) {
1532 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1533 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1534
1535 ov = int128_make128(ol, oh);
1536 cc = !int128_eq(ov, cv);
1537 if (cc) {
1538 nv = ov;
1539 }
1540
1541 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1542 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1543 } else if (HAVE_CMPXCHG128) {
1544 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1545 ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1546 cc = !int128_eq(ov, cv);
1547 } else {
1548 /* Note that we asserted !parallel above. */
1549 g_assert_not_reached();
1550 }
1551
1552 env->regs[r3 + 0] = int128_gethi(ov);
1553 env->regs[r3 + 1] = int128_getlo(ov);
1554 }
1555 break;
1556
1557 default:
1558 g_assert_not_reached();
1559 }
1560
1561 /* Store only if the comparison succeeded. Note that above we use a pair
1562 of 64-bit big-endian loads, so for sc < 3 we must extract the value
1563 from the most-significant bits of svh. */
1564 if (cc == 0) {
1565 switch (sc) {
1566 case 0:
1567 cpu_stb_data_ra(env, a2, svh >> 56, ra);
1568 break;
1569 case 1:
1570 cpu_stw_data_ra(env, a2, svh >> 48, ra);
1571 break;
1572 case 2:
1573 cpu_stl_data_ra(env, a2, svh >> 32, ra);
1574 break;
1575 case 3:
1576 cpu_stq_data_ra(env, a2, svh, ra);
1577 break;
1578 case 4:
1579 if (!parallel) {
1580 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1581 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1582 } else if (HAVE_ATOMIC128) {
1583 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1584 Int128 sv = int128_make128(svl, svh);
1585 helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1586 } else {
1587 /* Note that we asserted !parallel above. */
1588 g_assert_not_reached();
1589 }
1590 break;
1591 default:
1592 g_assert_not_reached();
1593 }
1594 }
1595
1596 return cc;
1597
1598 spec_exception:
1599 s390_program_interrupt(env, PGM_SPECIFICATION, 6, ra);
1600 g_assert_not_reached();
1601}
1602
1603uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1604{
1605 return do_csst(env, r3, a1, a2, false);
1606}
1607
1608uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1609 uint64_t a2)
1610{
1611 return do_csst(env, r3, a1, a2, true);
1612}
1613
1614#if !defined(CONFIG_USER_ONLY)
1615void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1616{
1617 uintptr_t ra = GETPC();
1618 bool PERchanged = false;
1619 uint64_t src = a2;
1620 uint32_t i;
1621
1622 if (src & 0x7) {
1623 s390_program_interrupt(env, PGM_SPECIFICATION, 6, ra);
1624 }
1625
1626 for (i = r1;; i = (i + 1) % 16) {
1627 uint64_t val = cpu_ldq_data_ra(env, src, ra);
1628 if (env->cregs[i] != val && i >= 9 && i <= 11) {
1629 PERchanged = true;
1630 }
1631 env->cregs[i] = val;
1632 HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1633 i, src, val);
1634 src += sizeof(uint64_t);
1635
1636 if (i == r3) {
1637 break;
1638 }
1639 }
1640
1641 if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1642 s390_cpu_recompute_watchpoints(env_cpu(env));
1643 }
1644
1645 tlb_flush(env_cpu(env));
1646}
1647
1648void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1649{
1650 uintptr_t ra = GETPC();
1651 bool PERchanged = false;
1652 uint64_t src = a2;
1653 uint32_t i;
1654
1655 if (src & 0x3) {
1656 s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
1657 }
1658
1659 for (i = r1;; i = (i + 1) % 16) {
1660 uint32_t val = cpu_ldl_data_ra(env, src, ra);
1661 if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1662 PERchanged = true;
1663 }
1664 env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1665 HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1666 src += sizeof(uint32_t);
1667
1668 if (i == r3) {
1669 break;
1670 }
1671 }
1672
1673 if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1674 s390_cpu_recompute_watchpoints(env_cpu(env));
1675 }
1676
1677 tlb_flush(env_cpu(env));
1678}
1679
1680void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1681{
1682 uintptr_t ra = GETPC();
1683 uint64_t dest = a2;
1684 uint32_t i;
1685
1686 if (dest & 0x7) {
1687 s390_program_interrupt(env, PGM_SPECIFICATION, 6, ra);
1688 }
1689
1690 for (i = r1;; i = (i + 1) % 16) {
1691 cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1692 dest += sizeof(uint64_t);
1693
1694 if (i == r3) {
1695 break;
1696 }
1697 }
1698}
1699
1700void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1701{
1702 uintptr_t ra = GETPC();
1703 uint64_t dest = a2;
1704 uint32_t i;
1705
1706 if (dest & 0x3) {
1707 s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
1708 }
1709
1710 for (i = r1;; i = (i + 1) % 16) {
1711 cpu_stl_data_ra(env, dest, env->cregs[i], ra);
1712 dest += sizeof(uint32_t);
1713
1714 if (i == r3) {
1715 break;
1716 }
1717 }
1718}
1719
1720uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
1721{
1722 uintptr_t ra = GETPC();
1723 int i;
1724
1725 real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
1726
1727 for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
1728 cpu_stq_real_ra(env, real_addr + i, 0, ra);
1729 }
1730
1731 return 0;
1732}
1733
1734uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
1735{
1736 S390CPU *cpu = env_archcpu(env);
1737 CPUState *cs = env_cpu(env);
1738
1739 /*
1740 * TODO: we currently don't handle all access protection types
1741 * (including access-list and key-controlled) as well as AR mode.
1742 */
1743 if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
1744 /* Fetching permitted; storing permitted */
1745 return 0;
1746 }
1747
1748 if (env->int_pgm_code == PGM_PROTECTION) {
1749 /* retry if reading is possible */
1750 cs->exception_index = 0;
1751 if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
1752 /* Fetching permitted; storing not permitted */
1753 return 1;
1754 }
1755 }
1756
1757 switch (env->int_pgm_code) {
1758 case PGM_PROTECTION:
1759 /* Fetching not permitted; storing not permitted */
1760 cs->exception_index = 0;
1761 return 2;
1762 case PGM_ADDRESSING:
1763 case PGM_TRANS_SPEC:
1764 /* exceptions forwarded to the guest */
1765 s390_cpu_virt_mem_handle_exc(cpu, GETPC());
1766 return 0;
1767 }
1768
1769 /* Translation not available */
1770 cs->exception_index = 0;
1771 return 3;
1772}
1773
1774/* insert storage key extended */
1775uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
1776{
1777 static S390SKeysState *ss;
1778 static S390SKeysClass *skeyclass;
1779 uint64_t addr = wrap_address(env, r2);
1780 uint8_t key;
1781
1782 if (addr > ram_size) {
1783 return 0;
1784 }
1785
1786 if (unlikely(!ss)) {
1787 ss = s390_get_skeys_device();
1788 skeyclass = S390_SKEYS_GET_CLASS(ss);
1789 }
1790
1791 if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
1792 return 0;
1793 }
1794 return key;
1795}
1796
1797/* set storage key extended */
1798void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
1799{
1800 static S390SKeysState *ss;
1801 static S390SKeysClass *skeyclass;
1802 uint64_t addr = wrap_address(env, r2);
1803 uint8_t key;
1804
1805 if (addr > ram_size) {
1806 return;
1807 }
1808
1809 if (unlikely(!ss)) {
1810 ss = s390_get_skeys_device();
1811 skeyclass = S390_SKEYS_GET_CLASS(ss);
1812 }
1813
1814 key = (uint8_t) r1;
1815 skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
1816 /*
1817 * As we can only flush by virtual address and not all the entries
1818 * that point to a physical address we have to flush the whole TLB.
1819 */
1820 tlb_flush_all_cpus_synced(env_cpu(env));
1821}
1822
1823/* reset reference bit extended */
1824uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
1825{
1826 static S390SKeysState *ss;
1827 static S390SKeysClass *skeyclass;
1828 uint8_t re, key;
1829
1830 if (r2 > ram_size) {
1831 return 0;
1832 }
1833
1834 if (unlikely(!ss)) {
1835 ss = s390_get_skeys_device();
1836 skeyclass = S390_SKEYS_GET_CLASS(ss);
1837 }
1838
1839 if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
1840 return 0;
1841 }
1842
1843 re = key & (SK_R | SK_C);
1844 key &= ~SK_R;
1845
1846 if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
1847 return 0;
1848 }
1849 /*
1850 * As we can only flush by virtual address and not all the entries
1851 * that point to a physical address we have to flush the whole TLB.
1852 */
1853 tlb_flush_all_cpus_synced(env_cpu(env));
1854
1855 /*
1856 * cc
1857 *
1858 * 0 Reference bit zero; change bit zero
1859 * 1 Reference bit zero; change bit one
1860 * 2 Reference bit one; change bit zero
1861 * 3 Reference bit one; change bit one
1862 */
1863
1864 return re >> 1;
1865}
1866
1867uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
1868{
1869 uintptr_t ra = GETPC();
1870 int cc = 0, i;
1871
1872 HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
1873 __func__, l, a1, a2);
1874
1875 if (l > 256) {
1876 /* max 256 */
1877 l = 256;
1878 cc = 3;
1879 }
1880
1881 /* XXX replace w/ memcpy */
1882 for (i = 0; i < l; i++) {
1883 uint8_t x = cpu_ldub_primary_ra(env, a2 + i, ra);
1884 cpu_stb_secondary_ra(env, a1 + i, x, ra);
1885 }
1886
1887 return cc;
1888}
1889
1890uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
1891{
1892 uintptr_t ra = GETPC();
1893 int cc = 0, i;
1894
1895 HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
1896 __func__, l, a1, a2);
1897
1898 if (l > 256) {
1899 /* max 256 */
1900 l = 256;
1901 cc = 3;
1902 }
1903
1904 /* XXX replace w/ memcpy */
1905 for (i = 0; i < l; i++) {
1906 uint8_t x = cpu_ldub_secondary_ra(env, a2 + i, ra);
1907 cpu_stb_primary_ra(env, a1 + i, x, ra);
1908 }
1909
1910 return cc;
1911}
1912
1913void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
1914{
1915 CPUState *cs = env_cpu(env);
1916 const uintptr_t ra = GETPC();
1917 uint64_t table, entry, raddr;
1918 uint16_t entries, i, index = 0;
1919
1920 if (r2 & 0xff000) {
1921 s390_program_interrupt(env, PGM_SPECIFICATION, 4, ra);
1922 }
1923
1924 if (!(r2 & 0x800)) {
1925 /* invalidation-and-clearing operation */
1926 table = r1 & ASCE_ORIGIN;
1927 entries = (r2 & 0x7ff) + 1;
1928
1929 switch (r1 & ASCE_TYPE_MASK) {
1930 case ASCE_TYPE_REGION1:
1931 index = (r2 >> 53) & 0x7ff;
1932 break;
1933 case ASCE_TYPE_REGION2:
1934 index = (r2 >> 42) & 0x7ff;
1935 break;
1936 case ASCE_TYPE_REGION3:
1937 index = (r2 >> 31) & 0x7ff;
1938 break;
1939 case ASCE_TYPE_SEGMENT:
1940 index = (r2 >> 20) & 0x7ff;
1941 break;
1942 }
1943 for (i = 0; i < entries; i++) {
1944 /* addresses are not wrapped in 24/31bit mode but table index is */
1945 raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
1946 entry = cpu_ldq_real_ra(env, raddr, ra);
1947 if (!(entry & REGION_ENTRY_INV)) {
1948 /* we are allowed to not store if already invalid */
1949 entry |= REGION_ENTRY_INV;
1950 cpu_stq_real_ra(env, raddr, entry, ra);
1951 }
1952 }
1953 }
1954
1955 /* We simply flush the complete tlb, therefore we can ignore r3. */
1956 if (m4 & 1) {
1957 tlb_flush(cs);
1958 } else {
1959 tlb_flush_all_cpus_synced(cs);
1960 }
1961}
1962
1963/* invalidate pte */
1964void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
1965 uint32_t m4)
1966{
1967 CPUState *cs = env_cpu(env);
1968 const uintptr_t ra = GETPC();
1969 uint64_t page = vaddr & TARGET_PAGE_MASK;
1970 uint64_t pte_addr, pte;
1971
1972 /* Compute the page table entry address */
1973 pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
1974 pte_addr += (vaddr & VADDR_PX) >> 9;
1975
1976 /* Mark the page table entry as invalid */
1977 pte = cpu_ldq_real_ra(env, pte_addr, ra);
1978 pte |= PAGE_INVALID;
1979 cpu_stq_real_ra(env, pte_addr, pte, ra);
1980
1981 /* XXX we exploit the fact that Linux passes the exact virtual
1982 address here - it's not obliged to! */
1983 if (m4 & 1) {
1984 if (vaddr & ~VADDR_PX) {
1985 tlb_flush_page(cs, page);
1986 /* XXX 31-bit hack */
1987 tlb_flush_page(cs, page ^ 0x80000000);
1988 } else {
1989 /* looks like we don't have a valid virtual address */
1990 tlb_flush(cs);
1991 }
1992 } else {
1993 if (vaddr & ~VADDR_PX) {
1994 tlb_flush_page_all_cpus_synced(cs, page);
1995 /* XXX 31-bit hack */
1996 tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
1997 } else {
1998 /* looks like we don't have a valid virtual address */
1999 tlb_flush_all_cpus_synced(cs);
2000 }
2001 }
2002}
2003
2004/* flush local tlb */
2005void HELPER(ptlb)(CPUS390XState *env)
2006{
2007 tlb_flush(env_cpu(env));
2008}
2009
2010/* flush global tlb */
2011void HELPER(purge)(CPUS390XState *env)
2012{
2013 tlb_flush_all_cpus_synced(env_cpu(env));
2014}
2015
2016/* load using real address */
2017uint64_t HELPER(lura)(CPUS390XState *env, uint64_t addr)
2018{
2019 return cpu_ldl_real_ra(env, wrap_address(env, addr), GETPC());
2020}
2021
2022uint64_t HELPER(lurag)(CPUS390XState *env, uint64_t addr)
2023{
2024 return cpu_ldq_real_ra(env, wrap_address(env, addr), GETPC());
2025}
2026
2027/* store using real address */
2028void HELPER(stura)(CPUS390XState *env, uint64_t addr, uint64_t v1)
2029{
2030 cpu_stl_real_ra(env, wrap_address(env, addr), (uint32_t)v1, GETPC());
2031
2032 if ((env->psw.mask & PSW_MASK_PER) &&
2033 (env->cregs[9] & PER_CR9_EVENT_STORE) &&
2034 (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
2035 /* PSW is saved just before calling the helper. */
2036 env->per_address = env->psw.addr;
2037 env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
2038 }
2039}
2040
2041void HELPER(sturg)(CPUS390XState *env, uint64_t addr, uint64_t v1)
2042{
2043 cpu_stq_real_ra(env, wrap_address(env, addr), v1, GETPC());
2044
2045 if ((env->psw.mask & PSW_MASK_PER) &&
2046 (env->cregs[9] & PER_CR9_EVENT_STORE) &&
2047 (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
2048 /* PSW is saved just before calling the helper. */
2049 env->per_address = env->psw.addr;
2050 env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
2051 }
2052}
2053
2054/* load real address */
2055uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2056{
2057 CPUState *cs = env_cpu(env);
2058 uint32_t cc = 0;
2059 uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2060 uint64_t ret;
2061 int old_exc, flags;
2062
2063 /* XXX incomplete - has more corner cases */
2064 if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2065 s390_program_interrupt(env, PGM_SPECIAL_OP, 2, GETPC());
2066 }
2067
2068 old_exc = cs->exception_index;
2069 if (mmu_translate(env, addr, 0, asc, &ret, &flags, true)) {
2070 cc = 3;
2071 }
2072 if (cs->exception_index == EXCP_PGM) {
2073 ret = env->int_pgm_code | 0x80000000;
2074 } else {
2075 ret |= addr & ~TARGET_PAGE_MASK;
2076 }
2077 cs->exception_index = old_exc;
2078
2079 env->cc_op = cc;
2080 return ret;
2081}
2082#endif
2083
2084/* load pair from quadword */
2085uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2086{
2087 uintptr_t ra = GETPC();
2088 uint64_t hi, lo;
2089
2090 check_alignment(env, addr, 16, ra);
2091 hi = cpu_ldq_data_ra(env, addr + 0, ra);
2092 lo = cpu_ldq_data_ra(env, addr + 8, ra);
2093
2094 env->retxl = lo;
2095 return hi;
2096}
2097
2098uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2099{
2100 uintptr_t ra = GETPC();
2101 uint64_t hi, lo;
2102 int mem_idx;
2103 TCGMemOpIdx oi;
2104 Int128 v;
2105
2106 assert(HAVE_ATOMIC128);
2107
2108 mem_idx = cpu_mmu_index(env, false);
2109 oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2110 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2111 hi = int128_gethi(v);
2112 lo = int128_getlo(v);
2113
2114 env->retxl = lo;
2115 return hi;
2116}
2117
2118/* store pair to quadword */
2119void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2120 uint64_t low, uint64_t high)
2121{
2122 uintptr_t ra = GETPC();
2123
2124 check_alignment(env, addr, 16, ra);
2125 cpu_stq_data_ra(env, addr + 0, high, ra);
2126 cpu_stq_data_ra(env, addr + 8, low, ra);
2127}
2128
2129void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2130 uint64_t low, uint64_t high)
2131{
2132 uintptr_t ra = GETPC();
2133 int mem_idx;
2134 TCGMemOpIdx oi;
2135 Int128 v;
2136
2137 assert(HAVE_ATOMIC128);
2138
2139 mem_idx = cpu_mmu_index(env, false);
2140 oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2141 v = int128_make128(low, high);
2142 helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2143}
2144
2145/* Execute instruction. This instruction executes an insn modified with
2146 the contents of r1. It does not change the executed instruction in memory;
2147 it does not change the program counter.
2148
2149 Perform this by recording the modified instruction in env->ex_value.
2150 This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2151*/
2152void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2153{
2154 uint64_t insn = cpu_lduw_code(env, addr);
2155 uint8_t opc = insn >> 8;
2156
2157 /* Or in the contents of R1[56:63]. */
2158 insn |= r1 & 0xff;
2159
2160 /* Load the rest of the instruction. */
2161 insn <<= 48;
2162 switch (get_ilen(opc)) {
2163 case 2:
2164 break;
2165 case 4:
2166 insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2167 break;
2168 case 6:
2169 insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2170 break;
2171 default:
2172 g_assert_not_reached();
2173 }
2174
2175 /* The very most common cases can be sped up by avoiding a new TB. */
2176 if ((opc & 0xf0) == 0xd0) {
2177 typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2178 uint64_t, uintptr_t);
2179 static const dx_helper dx[16] = {
2180 [0x0] = do_helper_trt_bkwd,
2181 [0x2] = do_helper_mvc,
2182 [0x4] = do_helper_nc,
2183 [0x5] = do_helper_clc,
2184 [0x6] = do_helper_oc,
2185 [0x7] = do_helper_xc,
2186 [0xc] = do_helper_tr,
2187 [0xd] = do_helper_trt_fwd,
2188 };
2189 dx_helper helper = dx[opc & 0xf];
2190
2191 if (helper) {
2192 uint32_t l = extract64(insn, 48, 8);
2193 uint32_t b1 = extract64(insn, 44, 4);
2194 uint32_t d1 = extract64(insn, 32, 12);
2195 uint32_t b2 = extract64(insn, 28, 4);
2196 uint32_t d2 = extract64(insn, 16, 12);
2197 uint64_t a1 = wrap_address(env, env->regs[b1] + d1);
2198 uint64_t a2 = wrap_address(env, env->regs[b2] + d2);
2199
2200 env->cc_op = helper(env, l, a1, a2, 0);
2201 env->psw.addr += ilen;
2202 return;
2203 }
2204 } else if (opc == 0x0a) {
2205 env->int_svc_code = extract64(insn, 48, 8);
2206 env->int_svc_ilen = ilen;
2207 helper_exception(env, EXCP_SVC);
2208 g_assert_not_reached();
2209 }
2210
2211 /* Record the insn we want to execute as well as the ilen to use
2212 during the execution of the target insn. This will also ensure
2213 that ex_value is non-zero, which flags that we are in a state
2214 that requires such execution. */
2215 env->ex_value = insn | ilen;
2216}
2217
2218uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2219 uint64_t len)
2220{
2221 const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2222 const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2223 const uint64_t r0 = env->regs[0];
2224 const uintptr_t ra = GETPC();
2225 uint8_t dest_key, dest_as, dest_k, dest_a;
2226 uint8_t src_key, src_as, src_k, src_a;
2227 uint64_t val;
2228 int cc = 0;
2229
2230 HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2231 __func__, dest, src, len);
2232
2233 if (!(env->psw.mask & PSW_MASK_DAT)) {
2234 s390_program_interrupt(env, PGM_SPECIAL_OP, 6, ra);
2235 }
2236
2237 /* OAC (operand access control) for the first operand -> dest */
2238 val = (r0 & 0xffff0000ULL) >> 16;
2239 dest_key = (val >> 12) & 0xf;
2240 dest_as = (val >> 6) & 0x3;
2241 dest_k = (val >> 1) & 0x1;
2242 dest_a = val & 0x1;
2243
2244 /* OAC (operand access control) for the second operand -> src */
2245 val = (r0 & 0x0000ffffULL);
2246 src_key = (val >> 12) & 0xf;
2247 src_as = (val >> 6) & 0x3;
2248 src_k = (val >> 1) & 0x1;
2249 src_a = val & 0x1;
2250
2251 if (!dest_k) {
2252 dest_key = psw_key;
2253 }
2254 if (!src_k) {
2255 src_key = psw_key;
2256 }
2257 if (!dest_a) {
2258 dest_as = psw_as;
2259 }
2260 if (!src_a) {
2261 src_as = psw_as;
2262 }
2263
2264 if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2265 s390_program_interrupt(env, PGM_SPECIAL_OP, 6, ra);
2266 }
2267 if (!(env->cregs[0] & CR0_SECONDARY) &&
2268 (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2269 s390_program_interrupt(env, PGM_SPECIAL_OP, 6, ra);
2270 }
2271 if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2272 s390_program_interrupt(env, PGM_PRIVILEGED, 6, ra);
2273 }
2274
2275 len = wrap_length(env, len);
2276 if (len > 4096) {
2277 cc = 3;
2278 len = 4096;
2279 }
2280
2281 /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2282 if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2283 (env->psw.mask & PSW_MASK_PSTATE)) {
2284 qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2285 __func__);
2286 s390_program_interrupt(env, PGM_ADDRESSING, 6, ra);
2287 }
2288
2289 /* FIXME: a) LAP
2290 * b) Access using correct keys
2291 * c) AR-mode
2292 */
2293#ifdef CONFIG_USER_ONLY
2294 /* psw keys are never valid in user mode, we will never reach this */
2295 g_assert_not_reached();
2296#else
2297 fast_memmove_as(env, dest, src, len, dest_as, src_as, ra);
2298#endif
2299
2300 return cc;
2301}
2302
2303/* Decode a Unicode character. A return value < 0 indicates success, storing
2304 the UTF-32 result into OCHAR and the input length into OLEN. A return
2305 value >= 0 indicates failure, and the CC value to be returned. */
2306typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2307 uint64_t ilen, bool enh_check, uintptr_t ra,
2308 uint32_t *ochar, uint32_t *olen);
2309
2310/* Encode a Unicode character. A return value < 0 indicates success, storing
2311 the bytes into ADDR and the output length into OLEN. A return value >= 0
2312 indicates failure, and the CC value to be returned. */
2313typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2314 uint64_t ilen, uintptr_t ra, uint32_t c,
2315 uint32_t *olen);
2316
2317static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2318 bool enh_check, uintptr_t ra,
2319 uint32_t *ochar, uint32_t *olen)
2320{
2321 uint8_t s0, s1, s2, s3;
2322 uint32_t c, l;
2323
2324 if (ilen < 1) {
2325 return 0;
2326 }
2327 s0 = cpu_ldub_data_ra(env, addr, ra);
2328 if (s0 <= 0x7f) {
2329 /* one byte character */
2330 l = 1;
2331 c = s0;
2332 } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2333 /* invalid character */
2334 return 2;
2335 } else if (s0 <= 0xdf) {
2336 /* two byte character */
2337 l = 2;
2338 if (ilen < 2) {
2339 return 0;
2340 }
2341 s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2342 c = s0 & 0x1f;
2343 c = (c << 6) | (s1 & 0x3f);
2344 if (enh_check && (s1 & 0xc0) != 0x80) {
2345 return 2;
2346 }
2347 } else if (s0 <= 0xef) {
2348 /* three byte character */
2349 l = 3;
2350 if (ilen < 3) {
2351 return 0;
2352 }
2353 s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2354 s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2355 c = s0 & 0x0f;
2356 c = (c << 6) | (s1 & 0x3f);
2357 c = (c << 6) | (s2 & 0x3f);
2358 /* Fold the byte-by-byte range descriptions in the PoO into
2359 tests against the complete value. It disallows encodings
2360 that could be smaller, and the UTF-16 surrogates. */
2361 if (enh_check
2362 && ((s1 & 0xc0) != 0x80
2363 || (s2 & 0xc0) != 0x80
2364 || c < 0x1000
2365 || (c >= 0xd800 && c <= 0xdfff))) {
2366 return 2;
2367 }
2368 } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2369 /* four byte character */
2370 l = 4;
2371 if (ilen < 4) {
2372 return 0;
2373 }
2374 s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2375 s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2376 s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2377 c = s0 & 0x07;
2378 c = (c << 6) | (s1 & 0x3f);
2379 c = (c << 6) | (s2 & 0x3f);
2380 c = (c << 6) | (s3 & 0x3f);
2381 /* See above. */
2382 if (enh_check
2383 && ((s1 & 0xc0) != 0x80
2384 || (s2 & 0xc0) != 0x80
2385 || (s3 & 0xc0) != 0x80
2386 || c < 0x010000
2387 || c > 0x10ffff)) {
2388 return 2;
2389 }
2390 } else {
2391 /* invalid character */
2392 return 2;
2393 }
2394
2395 *ochar = c;
2396 *olen = l;
2397 return -1;
2398}
2399
2400static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2401 bool enh_check, uintptr_t ra,
2402 uint32_t *ochar, uint32_t *olen)
2403{
2404 uint16_t s0, s1;
2405 uint32_t c, l;
2406
2407 if (ilen < 2) {
2408 return 0;
2409 }
2410 s0 = cpu_lduw_data_ra(env, addr, ra);
2411 if ((s0 & 0xfc00) != 0xd800) {
2412 /* one word character */
2413 l = 2;
2414 c = s0;
2415 } else {
2416 /* two word character */
2417 l = 4;
2418 if (ilen < 4) {
2419 return 0;
2420 }
2421 s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2422 c = extract32(s0, 6, 4) + 1;
2423 c = (c << 6) | (s0 & 0x3f);
2424 c = (c << 10) | (s1 & 0x3ff);
2425 if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2426 /* invalid surrogate character */
2427 return 2;
2428 }
2429 }
2430
2431 *ochar = c;
2432 *olen = l;
2433 return -1;
2434}
2435
2436static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2437 bool enh_check, uintptr_t ra,
2438 uint32_t *ochar, uint32_t *olen)
2439{
2440 uint32_t c;
2441
2442 if (ilen < 4) {
2443 return 0;
2444 }
2445 c = cpu_ldl_data_ra(env, addr, ra);
2446 if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2447 /* invalid unicode character */
2448 return 2;
2449 }
2450
2451 *ochar = c;
2452 *olen = 4;
2453 return -1;
2454}
2455
2456static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2457 uintptr_t ra, uint32_t c, uint32_t *olen)
2458{
2459 uint8_t d[4];
2460 uint32_t l, i;
2461
2462 if (c <= 0x7f) {
2463 /* one byte character */
2464 l = 1;
2465 d[0] = c;
2466 } else if (c <= 0x7ff) {
2467 /* two byte character */
2468 l = 2;
2469 d[1] = 0x80 | extract32(c, 0, 6);
2470 d[0] = 0xc0 | extract32(c, 6, 5);
2471 } else if (c <= 0xffff) {
2472 /* three byte character */
2473 l = 3;
2474 d[2] = 0x80 | extract32(c, 0, 6);
2475 d[1] = 0x80 | extract32(c, 6, 6);
2476 d[0] = 0xe0 | extract32(c, 12, 4);
2477 } else {
2478 /* four byte character */
2479 l = 4;
2480 d[3] = 0x80 | extract32(c, 0, 6);
2481 d[2] = 0x80 | extract32(c, 6, 6);
2482 d[1] = 0x80 | extract32(c, 12, 6);
2483 d[0] = 0xf0 | extract32(c, 18, 3);
2484 }
2485
2486 if (ilen < l) {
2487 return 1;
2488 }
2489 for (i = 0; i < l; ++i) {
2490 cpu_stb_data_ra(env, addr + i, d[i], ra);
2491 }
2492
2493 *olen = l;
2494 return -1;
2495}
2496
2497static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2498 uintptr_t ra, uint32_t c, uint32_t *olen)
2499{
2500 uint16_t d0, d1;
2501
2502 if (c <= 0xffff) {
2503 /* one word character */
2504 if (ilen < 2) {
2505 return 1;
2506 }
2507 cpu_stw_data_ra(env, addr, c, ra);
2508 *olen = 2;
2509 } else {
2510 /* two word character */
2511 if (ilen < 4) {
2512 return 1;
2513 }
2514 d1 = 0xdc00 | extract32(c, 0, 10);
2515 d0 = 0xd800 | extract32(c, 10, 6);
2516 d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2517 cpu_stw_data_ra(env, addr + 0, d0, ra);
2518 cpu_stw_data_ra(env, addr + 2, d1, ra);
2519 *olen = 4;
2520 }
2521
2522 return -1;
2523}
2524
2525static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2526 uintptr_t ra, uint32_t c, uint32_t *olen)
2527{
2528 if (ilen < 4) {
2529 return 1;
2530 }
2531 cpu_stl_data_ra(env, addr, c, ra);
2532 *olen = 4;
2533 return -1;
2534}
2535
2536static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2537 uint32_t r2, uint32_t m3, uintptr_t ra,
2538 decode_unicode_fn decode,
2539 encode_unicode_fn encode)
2540{
2541 uint64_t dst = get_address(env, r1);
2542 uint64_t dlen = get_length(env, r1 + 1);
2543 uint64_t src = get_address(env, r2);
2544 uint64_t slen = get_length(env, r2 + 1);
2545 bool enh_check = m3 & 1;
2546 int cc, i;
2547
2548 /* Lest we fail to service interrupts in a timely manner, limit the
2549 amount of work we're willing to do. For now, let's cap at 256. */
2550 for (i = 0; i < 256; ++i) {
2551 uint32_t c, ilen, olen;
2552
2553 cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2554 if (unlikely(cc >= 0)) {
2555 break;
2556 }
2557 cc = encode(env, dst, dlen, ra, c, &olen);
2558 if (unlikely(cc >= 0)) {
2559 break;
2560 }
2561
2562 src += ilen;
2563 slen -= ilen;
2564 dst += olen;
2565 dlen -= olen;
2566 cc = 3;
2567 }
2568
2569 set_address(env, r1, dst);
2570 set_length(env, r1 + 1, dlen);
2571 set_address(env, r2, src);
2572 set_length(env, r2 + 1, slen);
2573
2574 return cc;
2575}
2576
2577uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2578{
2579 return convert_unicode(env, r1, r2, m3, GETPC(),
2580 decode_utf8, encode_utf16);
2581}
2582
2583uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2584{
2585 return convert_unicode(env, r1, r2, m3, GETPC(),
2586 decode_utf8, encode_utf32);
2587}
2588
2589uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2590{
2591 return convert_unicode(env, r1, r2, m3, GETPC(),
2592 decode_utf16, encode_utf8);
2593}
2594
2595uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2596{
2597 return convert_unicode(env, r1, r2, m3, GETPC(),
2598 decode_utf16, encode_utf32);
2599}
2600
2601uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2602{
2603 return convert_unicode(env, r1, r2, m3, GETPC(),
2604 decode_utf32, encode_utf8);
2605}
2606
2607uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2608{
2609 return convert_unicode(env, r1, r2, m3, GETPC(),
2610 decode_utf32, encode_utf16);
2611}
2612
2613void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2614 uintptr_t ra)
2615{
2616 /* test the actual access, not just any access to the page due to LAP */
2617 while (len) {
2618 const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2619 const uint64_t curlen = MIN(pagelen, len);
2620
2621 probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2622 addr = wrap_address(env, addr + curlen);
2623 len -= curlen;
2624 }
2625}
2626
2627void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2628{
2629 probe_write_access(env, addr, len, GETPC());
2630}
2631