1/*
2 * S390x MMU related functions
3 *
4 * Copyright (c) 2011 Alexander Graf
5 * Copyright (c) 2015 Thomas Huth, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 */
17
18#include "qemu/osdep.h"
19#include "qemu/error-report.h"
20#include "exec/address-spaces.h"
21#include "cpu.h"
22#include "internal.h"
23#include "kvm_s390x.h"
24#include "sysemu/kvm.h"
25#include "sysemu/tcg.h"
26#include "exec/exec-all.h"
27#include "trace.h"
28#include "hw/hw.h"
29#include "hw/s390x/storage-keys.h"
30
31/* #define DEBUG_S390 */
32/* #define DEBUG_S390_PTE */
33/* #define DEBUG_S390_STDOUT */
34
35#ifdef DEBUG_S390
36#ifdef DEBUG_S390_STDOUT
37#define DPRINTF(fmt, ...) \
38 do { fprintf(stderr, fmt, ## __VA_ARGS__); \
39 if (qemu_log_separate()) qemu_log(fmt, ##__VA_ARGS__); } while (0)
40#else
41#define DPRINTF(fmt, ...) \
42 do { qemu_log(fmt, ## __VA_ARGS__); } while (0)
43#endif
44#else
45#define DPRINTF(fmt, ...) \
46 do { } while (0)
47#endif
48
49#ifdef DEBUG_S390_PTE
50#define PTE_DPRINTF DPRINTF
51#else
52#define PTE_DPRINTF(fmt, ...) \
53 do { } while (0)
54#endif
55
56/* Fetch/store bits in the translation exception code: */
57#define FS_READ 0x800
58#define FS_WRITE 0x400
59
60static void trigger_access_exception(CPUS390XState *env, uint32_t type,
61 uint32_t ilen, uint64_t tec)
62{
63 S390CPU *cpu = env_archcpu(env);
64
65 if (kvm_enabled()) {
66 kvm_s390_access_exception(cpu, type, tec);
67 } else {
68 CPUState *cs = env_cpu(env);
69 if (type != PGM_ADDRESSING) {
70 stq_phys(cs->as, env->psa + offsetof(LowCore, trans_exc_code), tec);
71 }
72 trigger_pgm_exception(env, type, ilen);
73 }
74}
75
76static void trigger_prot_fault(CPUS390XState *env, target_ulong vaddr,
77 uint64_t asc, int rw, bool exc)
78{
79 uint64_t tec;
80
81 tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | 4 | asc >> 46;
82
83 DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec);
84
85 if (!exc) {
86 return;
87 }
88
89 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, tec);
90}
91
92static void trigger_page_fault(CPUS390XState *env, target_ulong vaddr,
93 uint32_t type, uint64_t asc, int rw, bool exc)
94{
95 int ilen = ILEN_AUTO;
96 uint64_t tec;
97
98 tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | asc >> 46;
99
100 DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec);
101
102 if (!exc) {
103 return;
104 }
105
106 /* Code accesses have an undefined ilc. */
107 if (rw == MMU_INST_FETCH) {
108 ilen = 2;
109 }
110
111 trigger_access_exception(env, type, ilen, tec);
112}
113
114/* check whether the address would be proteted by Low-Address Protection */
115static bool is_low_address(uint64_t addr)
116{
117 return addr <= 511 || (addr >= 4096 && addr <= 4607);
118}
119
120/* check whether Low-Address Protection is enabled for mmu_translate() */
121static bool lowprot_enabled(const CPUS390XState *env, uint64_t asc)
122{
123 if (!(env->cregs[0] & CR0_LOWPROT)) {
124 return false;
125 }
126 if (!(env->psw.mask & PSW_MASK_DAT)) {
127 return true;
128 }
129
130 /* Check the private-space control bit */
131 switch (asc) {
132 case PSW_ASC_PRIMARY:
133 return !(env->cregs[1] & ASCE_PRIVATE_SPACE);
134 case PSW_ASC_SECONDARY:
135 return !(env->cregs[7] & ASCE_PRIVATE_SPACE);
136 case PSW_ASC_HOME:
137 return !(env->cregs[13] & ASCE_PRIVATE_SPACE);
138 default:
139 /* We don't support access register mode */
140 error_report("unsupported addressing mode");
141 exit(1);
142 }
143}
144
145/**
146 * Translate real address to absolute (= physical)
147 * address by taking care of the prefix mapping.
148 */
149target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr)
150{
151 if (raddr < 0x2000) {
152 return raddr + env->psa; /* Map the lowcore. */
153 } else if (raddr >= env->psa && raddr < env->psa + 0x2000) {
154 return raddr - env->psa; /* Map the 0 page. */
155 }
156 return raddr;
157}
158
159/* Decode page table entry (normal 4KB page) */
160static int mmu_translate_pte(CPUS390XState *env, target_ulong vaddr,
161 uint64_t asc, uint64_t pt_entry,
162 target_ulong *raddr, int *flags, int rw, bool exc)
163{
164 if (pt_entry & PAGE_INVALID) {
165 DPRINTF("%s: PTE=0x%" PRIx64 " invalid\n", __func__, pt_entry);
166 trigger_page_fault(env, vaddr, PGM_PAGE_TRANS, asc, rw, exc);
167 return -1;
168 }
169 if (pt_entry & PAGE_RES0) {
170 trigger_page_fault(env, vaddr, PGM_TRANS_SPEC, asc, rw, exc);
171 return -1;
172 }
173 if (pt_entry & PAGE_RO) {
174 *flags &= ~PAGE_WRITE;
175 }
176
177 *raddr = pt_entry & ASCE_ORIGIN;
178
179 PTE_DPRINTF("%s: PTE=0x%" PRIx64 "\n", __func__, pt_entry);
180
181 return 0;
182}
183
184/* Decode segment table entry */
185static int mmu_translate_segment(CPUS390XState *env, target_ulong vaddr,
186 uint64_t asc, uint64_t st_entry,
187 target_ulong *raddr, int *flags, int rw,
188 bool exc)
189{
190 CPUState *cs = env_cpu(env);
191 uint64_t origin, offs, pt_entry;
192
193 if (st_entry & SEGMENT_ENTRY_RO) {
194 *flags &= ~PAGE_WRITE;
195 }
196
197 if ((st_entry & SEGMENT_ENTRY_FC) && (env->cregs[0] & CR0_EDAT)) {
198 /* Decode EDAT1 segment frame absolute address (1MB page) */
199 *raddr = (st_entry & 0xfffffffffff00000ULL) | (vaddr & 0xfffff);
200 PTE_DPRINTF("%s: SEG=0x%" PRIx64 "\n", __func__, st_entry);
201 return 0;
202 }
203
204 /* Look up 4KB page entry */
205 origin = st_entry & SEGMENT_ENTRY_ORIGIN;
206 offs = (vaddr & VADDR_PX) >> 9;
207 pt_entry = ldq_phys(cs->as, origin + offs);
208 PTE_DPRINTF("%s: 0x%" PRIx64 " + 0x%" PRIx64 " => 0x%016" PRIx64 "\n",
209 __func__, origin, offs, pt_entry);
210 return mmu_translate_pte(env, vaddr, asc, pt_entry, raddr, flags, rw, exc);
211}
212
213/* Decode region table entries */
214static int mmu_translate_region(CPUS390XState *env, target_ulong vaddr,
215 uint64_t asc, uint64_t entry, int level,
216 target_ulong *raddr, int *flags, int rw,
217 bool exc)
218{
219 CPUState *cs = env_cpu(env);
220 uint64_t origin, offs, new_entry;
221 const int pchks[4] = {
222 PGM_SEGMENT_TRANS, PGM_REG_THIRD_TRANS,
223 PGM_REG_SEC_TRANS, PGM_REG_FIRST_TRANS
224 };
225
226 PTE_DPRINTF("%s: 0x%" PRIx64 "\n", __func__, entry);
227
228 origin = entry & REGION_ENTRY_ORIGIN;
229 offs = (vaddr >> (17 + 11 * level / 4)) & 0x3ff8;
230
231 new_entry = ldq_phys(cs->as, origin + offs);
232 PTE_DPRINTF("%s: 0x%" PRIx64 " + 0x%" PRIx64 " => 0x%016" PRIx64 "\n",
233 __func__, origin, offs, new_entry);
234
235 if ((new_entry & REGION_ENTRY_INV) != 0) {
236 DPRINTF("%s: invalid region\n", __func__);
237 trigger_page_fault(env, vaddr, pchks[level / 4], asc, rw, exc);
238 return -1;
239 }
240
241 if ((new_entry & REGION_ENTRY_TYPE_MASK) != level) {
242 trigger_page_fault(env, vaddr, PGM_TRANS_SPEC, asc, rw, exc);
243 return -1;
244 }
245
246 if (level == ASCE_TYPE_SEGMENT) {
247 return mmu_translate_segment(env, vaddr, asc, new_entry, raddr, flags,
248 rw, exc);
249 }
250
251 /* Check region table offset and length */
252 offs = (vaddr >> (28 + 11 * (level - 4) / 4)) & 3;
253 if (offs < ((new_entry & REGION_ENTRY_TF) >> 6)
254 || offs > (new_entry & REGION_ENTRY_LENGTH)) {
255 DPRINTF("%s: invalid offset or len (%lx)\n", __func__, new_entry);
256 trigger_page_fault(env, vaddr, pchks[level / 4 - 1], asc, rw, exc);
257 return -1;
258 }
259
260 if ((env->cregs[0] & CR0_EDAT) && (new_entry & REGION_ENTRY_RO)) {
261 *flags &= ~PAGE_WRITE;
262 }
263
264 /* yet another region */
265 return mmu_translate_region(env, vaddr, asc, new_entry, level - 4,
266 raddr, flags, rw, exc);
267}
268
269static int mmu_translate_asce(CPUS390XState *env, target_ulong vaddr,
270 uint64_t asc, uint64_t asce, target_ulong *raddr,
271 int *flags, int rw, bool exc)
272{
273 int level;
274 int r;
275
276 if (asce & ASCE_REAL_SPACE) {
277 /* direct mapping */
278 *raddr = vaddr;
279 return 0;
280 }
281
282 level = asce & ASCE_TYPE_MASK;
283 switch (level) {
284 case ASCE_TYPE_REGION1:
285 if ((vaddr >> 62) > (asce & ASCE_TABLE_LENGTH)) {
286 trigger_page_fault(env, vaddr, PGM_REG_FIRST_TRANS, asc, rw, exc);
287 return -1;
288 }
289 break;
290 case ASCE_TYPE_REGION2:
291 if (vaddr & 0xffe0000000000000ULL) {
292 DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64
293 " 0xffe0000000000000ULL\n", __func__, vaddr);
294 trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc);
295 return -1;
296 }
297 if ((vaddr >> 51 & 3) > (asce & ASCE_TABLE_LENGTH)) {
298 trigger_page_fault(env, vaddr, PGM_REG_SEC_TRANS, asc, rw, exc);
299 return -1;
300 }
301 break;
302 case ASCE_TYPE_REGION3:
303 if (vaddr & 0xfffffc0000000000ULL) {
304 DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64
305 " 0xfffffc0000000000ULL\n", __func__, vaddr);
306 trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc);
307 return -1;
308 }
309 if ((vaddr >> 40 & 3) > (asce & ASCE_TABLE_LENGTH)) {
310 trigger_page_fault(env, vaddr, PGM_REG_THIRD_TRANS, asc, rw, exc);
311 return -1;
312 }
313 break;
314 case ASCE_TYPE_SEGMENT:
315 if (vaddr & 0xffffffff80000000ULL) {
316 DPRINTF("%s: vaddr doesn't fit 0x%16" PRIx64
317 " 0xffffffff80000000ULL\n", __func__, vaddr);
318 trigger_page_fault(env, vaddr, PGM_ASCE_TYPE, asc, rw, exc);
319 return -1;
320 }
321 if ((vaddr >> 29 & 3) > (asce & ASCE_TABLE_LENGTH)) {
322 trigger_page_fault(env, vaddr, PGM_SEGMENT_TRANS, asc, rw, exc);
323 return -1;
324 }
325 break;
326 }
327
328 r = mmu_translate_region(env, vaddr, asc, asce, level, raddr, flags, rw,
329 exc);
330 if (!r && rw == MMU_DATA_STORE && !(*flags & PAGE_WRITE)) {
331 trigger_prot_fault(env, vaddr, asc, rw, exc);
332 return -1;
333 }
334
335 return r;
336}
337
338static void mmu_handle_skey(target_ulong addr, int rw, int *flags)
339{
340 static S390SKeysClass *skeyclass;
341 static S390SKeysState *ss;
342 uint8_t key;
343 int rc;
344
345 if (unlikely(addr >= ram_size)) {
346 return;
347 }
348
349 if (unlikely(!ss)) {
350 ss = s390_get_skeys_device();
351 skeyclass = S390_SKEYS_GET_CLASS(ss);
352 }
353
354 /*
355 * Whenever we create a new TLB entry, we set the storage key reference
356 * bit. In case we allow write accesses, we set the storage key change
357 * bit. Whenever the guest changes the storage key, we have to flush the
358 * TLBs of all CPUs (the whole TLB or all affected entries), so that the
359 * next reference/change will result in an MMU fault and make us properly
360 * update the storage key here.
361 *
362 * Note 1: "record of references ... is not necessarily accurate",
363 * "change bit may be set in case no storing has occurred".
364 * -> We can set reference/change bits even on exceptions.
365 * Note 2: certain accesses seem to ignore storage keys. For example,
366 * DAT translation does not set reference bits for table accesses.
367 *
368 * TODO: key-controlled protection. Only CPU accesses make use of the
369 * PSW key. CSS accesses are different - we have to pass in the key.
370 *
371 * TODO: we have races between getting and setting the key.
372 */
373 rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
374 if (rc) {
375 trace_get_skeys_nonzero(rc);
376 return;
377 }
378
379 switch (rw) {
380 case MMU_DATA_LOAD:
381 case MMU_INST_FETCH:
382 /*
383 * The TLB entry has to remain write-protected on read-faults if
384 * the storage key does not indicate a change already. Otherwise
385 * we might miss setting the change bit on write accesses.
386 */
387 if (!(key & SK_C)) {
388 *flags &= ~PAGE_WRITE;
389 }
390 break;
391 case MMU_DATA_STORE:
392 key |= SK_C;
393 break;
394 default:
395 g_assert_not_reached();
396 }
397
398 /* Any store/fetch sets the reference bit */
399 key |= SK_R;
400
401 rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
402 if (rc) {
403 trace_set_skeys_nonzero(rc);
404 }
405}
406
407/**
408 * Translate a virtual (logical) address into a physical (absolute) address.
409 * @param vaddr the virtual address
410 * @param rw 0 = read, 1 = write, 2 = code fetch
411 * @param asc address space control (one of the PSW_ASC_* modes)
412 * @param raddr the translated address is stored to this pointer
413 * @param flags the PAGE_READ/WRITE/EXEC flags are stored to this pointer
414 * @param exc true = inject a program check if a fault occurred
415 * @return 0 if the translation was successful, -1 if a fault occurred
416 */
417int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
418 target_ulong *raddr, int *flags, bool exc)
419{
420 uint64_t asce;
421 int r;
422
423
424 *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
425 if (is_low_address(vaddr & TARGET_PAGE_MASK) && lowprot_enabled(env, asc)) {
426 /*
427 * If any part of this page is currently protected, make sure the
428 * TLB entry will not be reused.
429 *
430 * As the protected range is always the first 512 bytes of the
431 * two first pages, we are able to catch all writes to these areas
432 * just by looking at the start address (triggering the tlb miss).
433 */
434 *flags |= PAGE_WRITE_INV;
435 if (is_low_address(vaddr) && rw == MMU_DATA_STORE) {
436 if (exc) {
437 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, 0);
438 }
439 return -EACCES;
440 }
441 }
442
443 vaddr &= TARGET_PAGE_MASK;
444
445 if (!(env->psw.mask & PSW_MASK_DAT)) {
446 *raddr = vaddr;
447 goto nodat;
448 }
449
450 switch (asc) {
451 case PSW_ASC_PRIMARY:
452 PTE_DPRINTF("%s: asc=primary\n", __func__);
453 asce = env->cregs[1];
454 break;
455 case PSW_ASC_HOME:
456 PTE_DPRINTF("%s: asc=home\n", __func__);
457 asce = env->cregs[13];
458 break;
459 case PSW_ASC_SECONDARY:
460 PTE_DPRINTF("%s: asc=secondary\n", __func__);
461 asce = env->cregs[7];
462 break;
463 case PSW_ASC_ACCREG:
464 default:
465 hw_error("guest switched to unknown asc mode\n");
466 break;
467 }
468
469 /* perform the DAT translation */
470 r = mmu_translate_asce(env, vaddr, asc, asce, raddr, flags, rw, exc);
471 if (r) {
472 return r;
473 }
474
475nodat:
476 /* Convert real address -> absolute address */
477 *raddr = mmu_real2abs(env, *raddr);
478
479 mmu_handle_skey(*raddr, rw, flags);
480 return 0;
481}
482
483/**
484 * translate_pages: Translate a set of consecutive logical page addresses
485 * to absolute addresses. This function is used for TCG and old KVM without
486 * the MEMOP interface.
487 */
488static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages,
489 target_ulong *pages, bool is_write)
490{
491 uint64_t asc = cpu->env.psw.mask & PSW_MASK_ASC;
492 CPUS390XState *env = &cpu->env;
493 int ret, i, pflags;
494
495 for (i = 0; i < nr_pages; i++) {
496 ret = mmu_translate(env, addr, is_write, asc, &pages[i], &pflags, true);
497 if (ret) {
498 return ret;
499 }
500 if (!address_space_access_valid(&address_space_memory, pages[i],
501 TARGET_PAGE_SIZE, is_write,
502 MEMTXATTRS_UNSPECIFIED)) {
503 trigger_access_exception(env, PGM_ADDRESSING, ILEN_AUTO, 0);
504 return -EFAULT;
505 }
506 addr += TARGET_PAGE_SIZE;
507 }
508
509 return 0;
510}
511
512/**
513 * s390_cpu_virt_mem_rw:
514 * @laddr: the logical start address
515 * @ar: the access register number
516 * @hostbuf: buffer in host memory. NULL = do only checks w/o copying
517 * @len: length that should be transferred
518 * @is_write: true = write, false = read
519 * Returns: 0 on success, non-zero if an exception occurred
520 *
521 * Copy from/to guest memory using logical addresses. Note that we inject a
522 * program interrupt in case there is an error while accessing the memory.
523 *
524 * This function will always return (also for TCG), make sure to call
525 * s390_cpu_virt_mem_handle_exc() to properly exit the CPU loop.
526 */
527int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf,
528 int len, bool is_write)
529{
530 int currlen, nr_pages, i;
531 target_ulong *pages;
532 int ret;
533
534 if (kvm_enabled()) {
535 ret = kvm_s390_mem_op(cpu, laddr, ar, hostbuf, len, is_write);
536 if (ret >= 0) {
537 return ret;
538 }
539 }
540
541 nr_pages = (((laddr & ~TARGET_PAGE_MASK) + len - 1) >> TARGET_PAGE_BITS)
542 + 1;
543 pages = g_malloc(nr_pages * sizeof(*pages));
544
545 ret = translate_pages(cpu, laddr, nr_pages, pages, is_write);
546 if (ret == 0 && hostbuf != NULL) {
547 /* Copy data by stepping through the area page by page */
548 for (i = 0; i < nr_pages; i++) {
549 currlen = MIN(len, TARGET_PAGE_SIZE - (laddr % TARGET_PAGE_SIZE));
550 cpu_physical_memory_rw(pages[i] | (laddr & ~TARGET_PAGE_MASK),
551 hostbuf, currlen, is_write);
552 laddr += currlen;
553 hostbuf += currlen;
554 len -= currlen;
555 }
556 }
557
558 g_free(pages);
559 return ret;
560}
561
562void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra)
563{
564 /* KVM will handle the interrupt automatically, TCG has to exit the TB */
565#ifdef CONFIG_TCG
566 if (tcg_enabled()) {
567 cpu_loop_exit_restore(CPU(cpu), ra);
568 }
569#endif
570}
571
572/**
573 * Translate a real address into a physical (absolute) address.
574 * @param raddr the real address
575 * @param rw 0 = read, 1 = write, 2 = code fetch
576 * @param addr the translated address is stored to this pointer
577 * @param flags the PAGE_READ/WRITE/EXEC flags are stored to this pointer
578 * @return 0 if the translation was successful, < 0 if a fault occurred
579 */
580int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw,
581 target_ulong *addr, int *flags)
582{
583 const bool lowprot_enabled = env->cregs[0] & CR0_LOWPROT;
584
585 *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
586 if (is_low_address(raddr & TARGET_PAGE_MASK) && lowprot_enabled) {
587 /* see comment in mmu_translate() how this works */
588 *flags |= PAGE_WRITE_INV;
589 if (is_low_address(raddr) && rw == MMU_DATA_STORE) {
590 trigger_access_exception(env, PGM_PROTECTION, ILEN_AUTO, 0);
591 return -EACCES;
592 }
593 }
594
595 *addr = mmu_real2abs(env, raddr & TARGET_PAGE_MASK);
596
597 mmu_handle_skey(*addr, rw, flags);
598 return 0;
599}
600