/*- * Copyright (c) 2006 Peter Wemm * Copyright (c) 2019 Leandro Lupori * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: FreeBSD: src/lib/libkvm/kvm_minidump_riscv.c */ #include #include #include #include #include #include #include #include #include "../../sys/powerpc/include/minidump.h" #include "kvm_private.h" #include "kvm_powerpc64.h" /* * PowerPC64 HPT machine dependent routines for kvm and minidumps. * * Address Translation parameters: * * b = 12 (SLB base page size: 4 KB) * b = 24 (SLB base page size: 16 MB) * p = 12 (page size: 4 KB) * p = 24 (page size: 16 MB) * s = 28 (segment size: 256 MB) */ /* Large (huge) page params */ #define LP_PAGE_SHIFT 24 #define LP_PAGE_SIZE (1ULL << LP_PAGE_SHIFT) #define LP_PAGE_MASK 0x00ffffffULL /* SLB */ #define SEGMENT_LENGTH 0x10000000ULL #define round_seg(x) roundup2((uint64_t)(x), SEGMENT_LENGTH) /* Virtual real-mode VSID in LPARs */ #define VSID_VRMA 0x1ffffffULL #define SLBV_L 0x0000000000000100ULL /* Large page selector */ #define SLBV_CLASS 0x0000000000000080ULL /* Class selector */ #define SLBV_LP_MASK 0x0000000000000030ULL #define SLBV_VSID_MASK 0x3ffffffffffff000ULL /* Virtual SegID mask */ #define SLBV_VSID_SHIFT 12 #define SLBE_B_MASK 0x0000000006000000ULL #define SLBE_B_256MB 0x0000000000000000ULL #define SLBE_VALID 0x0000000008000000ULL /* SLB entry valid */ #define SLBE_INDEX_MASK 0x0000000000000fffULL /* SLB index mask */ #define SLBE_ESID_MASK 0xfffffffff0000000ULL /* Effective SegID mask */ #define SLBE_ESID_SHIFT 28 /* PTE */ #define LPTEH_VSID_SHIFT 12 #define LPTEH_AVPN_MASK 0xffffffffffffff80ULL #define LPTEH_B_MASK 0xc000000000000000ULL #define LPTEH_B_256MB 0x0000000000000000ULL #define LPTEH_BIG 0x0000000000000004ULL /* 4KB/16MB page */ #define LPTEH_HID 0x0000000000000002ULL #define LPTEH_VALID 0x0000000000000001ULL #define LPTEL_RPGN 0xfffffffffffff000ULL #define LPTEL_LP_MASK 0x00000000000ff000ULL #define LPTEL_NOEXEC 0x0000000000000004ULL /* Supervisor (U: RW, S: RW) */ #define LPTEL_BW 0x0000000000000002ULL /* Both Read Only (U: RO, S: RO) */ #define LPTEL_BR 0x0000000000000003ULL #define LPTEL_RW LPTEL_BW #define LPTEL_RO LPTEL_BR /* * PTE AVA field manipulation macros. * * AVA[0:54] = PTEH[2:56] * AVA[VSID] = AVA[0:49] = PTEH[2:51] * AVA[PAGE] = AVA[50:54] = PTEH[52:56] */ #define PTEH_AVA_VSID_MASK 0x3ffffffffffff000UL #define PTEH_AVA_VSID_SHIFT 12 #define PTEH_AVA_VSID(p) \ (((p) & PTEH_AVA_VSID_MASK) >> PTEH_AVA_VSID_SHIFT) #define PTEH_AVA_PAGE_MASK 0x0000000000000f80UL #define PTEH_AVA_PAGE_SHIFT 7 #define PTEH_AVA_PAGE(p) \ (((p) & PTEH_AVA_PAGE_MASK) >> PTEH_AVA_PAGE_SHIFT) /* Masks to obtain the Physical Address from PTE low 64-bit word. */ #define PTEL_PA_MASK 0x0ffffffffffff000UL #define PTEL_LP_PA_MASK 0x0fffffffff000000UL #define PTE_HASH_MASK 0x0000007fffffffffUL /* * Number of AVA/VA page bits to shift right, in order to leave only the * ones that should be considered. * * q = MIN(54, 77-b) (PowerISA v2.07B, 5.7.7.3) * n = q + 1 - 50 (VSID size in bits) * s(ava) = 5 - n * s(va) = (28 - b) - n * * q: bit number of lower limit of VA/AVA bits to compare * n: number of AVA/VA page bits to compare * s: shift amount * 28 - b: VA page size in bits */ #define AVA_PAGE_SHIFT(b) (5 - (MIN(54, 77-(b)) + 1 - 50)) #define VA_PAGE_SHIFT(b) (28 - (b) - (MIN(54, 77-(b)) + 1 - 50)) /* Kernel ESID -> VSID mapping */ #define KERNEL_VSID_BIT 0x0000001000000000UL /* Bit set in all kernel VSIDs */ #define KERNEL_VSID(esid) ((((((uint64_t)esid << 8) | ((uint64_t)esid >> 28)) \ * 0x13bbUL) & (KERNEL_VSID_BIT - 1)) | \ KERNEL_VSID_BIT) /* Types */ typedef uint64_t ppc64_physaddr_t; typedef struct { uint64_t slbv; uint64_t slbe; } ppc64_slb_entry_t; typedef struct { uint64_t pte_hi; uint64_t pte_lo; } ppc64_pt_entry_t; struct hpt_data { ppc64_slb_entry_t *slbs; uint32_t slbsize; }; static void slb_fill(ppc64_slb_entry_t *slb, uint64_t ea, uint64_t i) { uint64_t esid; esid = ea >> SLBE_ESID_SHIFT; slb->slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; slb->slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | i; } static int slb_init(kvm_t *kd) { struct minidumphdr *hdr; struct hpt_data *data; ppc64_slb_entry_t *slb; uint32_t slbsize; uint64_t ea, i, maxmem; hdr = &kd->vmst->hdr; data = PPC64_MMU_DATA(kd); /* Alloc SLBs */ maxmem = hdr->bitmapsize * 8 * PPC64_PAGE_SIZE; slbsize = round_seg(hdr->kernend + 1 - hdr->kernbase + maxmem) / SEGMENT_LENGTH * sizeof(ppc64_slb_entry_t); data->slbs = _kvm_malloc(kd, slbsize); if (data->slbs == NULL) { _kvm_err(kd, kd->program, "cannot allocate slbs"); return (-1); } data->slbsize = slbsize; dprintf("%s: maxmem=0x%jx, segs=%jd, slbsize=0x%jx\n", __func__, (uintmax_t)maxmem, (uintmax_t)slbsize / sizeof(ppc64_slb_entry_t), (uintmax_t)slbsize); /* * Generate needed SLB entries. * * When translating addresses from EA to VA to PA, the needed SLB * entry could be generated on the fly, but this is not the case * for the walk_pages method, that needs to search the SLB entry * by VSID, in order to find out the EA from a PTE. */ /* VM area */ for (ea = hdr->kernbase, i = 0, slb = data->slbs; ea < hdr->kernend; ea += SEGMENT_LENGTH, i++, slb++) slb_fill(slb, ea, i); /* DMAP area */ for (ea = hdr->dmapbase; ea < MIN(hdr->dmapend, hdr->dmapbase + maxmem); ea += SEGMENT_LENGTH, i++, slb++) { slb_fill(slb, ea, i); if (hdr->hw_direct_map) slb->slbv |= SLBV_L; } return (0); } static void ppc64mmu_hpt_cleanup(kvm_t *kd) { struct hpt_data *data; if (kd->vmst == NULL) return; data = PPC64_MMU_DATA(kd); free(data->slbs); free(data); PPC64_MMU_DATA(kd) = NULL; } static int ppc64mmu_hpt_init(kvm_t *kd) { struct hpt_data *data; /* Alloc MMU data */ data = _kvm_malloc(kd, sizeof(*data)); if (data == NULL) { _kvm_err(kd, kd->program, "cannot allocate MMU data"); return (-1); } data->slbs = NULL; PPC64_MMU_DATA(kd) = data; if (slb_init(kd) == -1) goto failed; return (0); failed: ppc64mmu_hpt_cleanup(kd); return (-1); } static ppc64_slb_entry_t * slb_search(kvm_t *kd, kvaddr_t ea) { struct hpt_data *data; ppc64_slb_entry_t *slb; int i, n; data = PPC64_MMU_DATA(kd); slb = data->slbs; n = data->slbsize / sizeof(ppc64_slb_entry_t); /* SLB search */ for (i = 0; i < n; i++, slb++) { if ((slb->slbe & SLBE_VALID) == 0) continue; /* Compare 36-bit ESID of EA with segment one (64-s) */ if ((slb->slbe & SLBE_ESID_MASK) != (ea & SLBE_ESID_MASK)) continue; /* Match found */ dprintf("SEG#%02d: slbv=0x%016jx, slbe=0x%016jx\n", i, (uintmax_t)slb->slbv, (uintmax_t)slb->slbe); break; } /* SLB not found */ if (i == n) { _kvm_err(kd, kd->program, "%s: segment not found for EA 0x%jx", __func__, (uintmax_t)ea); return (NULL); } return (slb); } static ppc64_pt_entry_t pte_get(kvm_t *kd, u_long ptex) { ppc64_pt_entry_t pte, *p; p = _kvm_pmap_get(kd, ptex, sizeof(pte)); pte.pte_hi = be64toh(p->pte_hi); pte.pte_lo = be64toh(p->pte_lo); return (pte); } static int pte_search(kvm_t *kd, ppc64_slb_entry_t *slb, uint64_t hid, kvaddr_t ea, ppc64_pt_entry_t *p) { uint64_t hash, hmask; uint64_t pteg, ptex; uint64_t va_vsid, va_page; int b; int ava_pg_shift, va_pg_shift; ppc64_pt_entry_t pte; /* * Get VA: * * va(78) = va_vsid(50) || va_page(s-b) || offset(b) * * va_vsid: 50-bit VSID (78-s) * va_page: (s-b)-bit VA page */ b = slb->slbv & SLBV_L? LP_PAGE_SHIFT : PPC64_PAGE_SHIFT; va_vsid = (slb->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT; va_page = (ea & ~SLBE_ESID_MASK) >> b; dprintf("%s: hid=0x%jx, ea=0x%016jx, b=%d, va_vsid=0x%010jx, " "va_page=0x%04jx\n", __func__, (uintmax_t)hid, (uintmax_t)ea, b, (uintmax_t)va_vsid, (uintmax_t)va_page); /* * Get hash: * * Primary hash: va_vsid(11:49) ^ va_page(s-b) * Secondary hash: ~primary_hash */ hash = (va_vsid & PTE_HASH_MASK) ^ va_page; if (hid) hash = ~hash & PTE_HASH_MASK; /* * Get PTEG: * * pteg = (hash(0:38) & hmask) << 3 * * hmask (hash mask): mask generated from HTABSIZE || 11*0b1 * hmask = number_of_ptegs - 1 */ hmask = kd->vmst->hdr.pmapsize / (8 * sizeof(ppc64_pt_entry_t)) - 1; pteg = (hash & hmask) << 3; ava_pg_shift = AVA_PAGE_SHIFT(b); va_pg_shift = VA_PAGE_SHIFT(b); dprintf("%s: hash=0x%010jx, hmask=0x%010jx, (hash & hmask)=0x%010jx, " "pteg=0x%011jx, ava_pg_shift=%d, va_pg_shift=%d\n", __func__, (uintmax_t)hash, (uintmax_t)hmask, (uintmax_t)(hash & hmask), (uintmax_t)pteg, ava_pg_shift, va_pg_shift); /* Search PTEG */ for (ptex = pteg; ptex < pteg + 8; ptex++) { pte = pte_get(kd, ptex); /* Check H, V and B */ if ((pte.pte_hi & LPTEH_HID) != hid || (pte.pte_hi & LPTEH_VALID) == 0 || (pte.pte_hi & LPTEH_B_MASK) != LPTEH_B_256MB) continue; /* Compare AVA with VA */ if (PTEH_AVA_VSID(pte.pte_hi) != va_vsid || (PTEH_AVA_PAGE(pte.pte_hi) >> ava_pg_shift) != (va_page >> va_pg_shift)) continue; /* * Check if PTE[L] matches SLBV[L]. * * Note: this check ignores PTE[LP], as does the kernel. */ if (b == PPC64_PAGE_SHIFT) { if (pte.pte_hi & LPTEH_BIG) continue; } else if ((pte.pte_hi & LPTEH_BIG) == 0) continue; /* Match found */ dprintf("%s: PTE found: ptex=0x%jx, pteh=0x%016jx, " "ptel=0x%016jx\n", __func__, (uintmax_t)ptex, (uintmax_t)pte.pte_hi, (uintmax_t)pte.pte_lo); break; } /* Not found? */ if (ptex == pteg + 8) { /* Try secondary hash */ if (hid == 0) return (pte_search(kd, slb, LPTEH_HID, ea, p)); else { _kvm_err(kd, kd->program, "%s: pte not found", __func__); return (-1); } } /* PTE found */ *p = pte; return (0); } static int pte_lookup(kvm_t *kd, kvaddr_t ea, ppc64_pt_entry_t *pte) { ppc64_slb_entry_t *slb; /* First, find SLB */ if ((slb = slb_search(kd, ea)) == NULL) return (-1); /* Next, find PTE */ return (pte_search(kd, slb, 0, ea, pte)); } static int ppc64mmu_hpt_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa) { struct minidumphdr *hdr; struct vmstate *vm; ppc64_pt_entry_t pte; ppc64_physaddr_t pgoff, pgpa; off_t ptoff; int err; vm = kd->vmst; hdr = &vm->hdr; pgoff = va & PPC64_PAGE_MASK; dprintf("%s: va=0x%016jx\n", __func__, (uintmax_t)va); /* * A common use case of libkvm is to first find a symbol address * from the kernel image and then use kvatop to translate it and * to be able to fetch its corresponding data. * * The problem is that, in PowerPC64 case, the addresses of relocated * data won't match those in the kernel image. This is handled here by * adding the relocation offset to those addresses. */ if (va < hdr->dmapbase) va += hdr->startkernel - PPC64_KERNBASE; /* Handle DMAP */ if (va >= hdr->dmapbase && va <= hdr->dmapend) { pgpa = (va & ~hdr->dmapbase) & ~PPC64_PAGE_MASK; ptoff = _kvm_pt_find(kd, pgpa, PPC64_PAGE_SIZE); if (ptoff == -1) { _kvm_err(kd, kd->program, "%s: " "direct map address 0x%jx not in minidump", __func__, (uintmax_t)va); goto invalid; } *pa = ptoff + pgoff; return (PPC64_PAGE_SIZE - pgoff); /* Translate VA to PA */ } else if (va >= hdr->kernbase) { if ((err = pte_lookup(kd, va, &pte)) == -1) { _kvm_err(kd, kd->program, "%s: pte not valid", __func__); goto invalid; } if (pte.pte_hi & LPTEH_BIG) pgpa = (pte.pte_lo & PTEL_LP_PA_MASK) | (va & ~PPC64_PAGE_MASK & LP_PAGE_MASK); else pgpa = pte.pte_lo & PTEL_PA_MASK; dprintf("%s: pgpa=0x%016jx\n", __func__, (uintmax_t)pgpa); ptoff = _kvm_pt_find(kd, pgpa, PPC64_PAGE_SIZE); if (ptoff == -1) { _kvm_err(kd, kd->program, "%s: " "physical address 0x%jx not in minidump", __func__, (uintmax_t)pgpa); goto invalid; } *pa = ptoff + pgoff; return (PPC64_PAGE_SIZE - pgoff); } else { _kvm_err(kd, kd->program, "%s: virtual address 0x%jx not minidumped", __func__, (uintmax_t)va); goto invalid; } invalid: _kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va); return (0); } static vm_prot_t entry_to_prot(ppc64_pt_entry_t *pte) { vm_prot_t prot = VM_PROT_READ; if (pte->pte_lo & LPTEL_RW) prot |= VM_PROT_WRITE; if ((pte->pte_lo & LPTEL_NOEXEC) != 0) prot |= VM_PROT_EXECUTE; return (prot); } static ppc64_slb_entry_t * slb_vsid_search(kvm_t *kd, uint64_t vsid) { struct hpt_data *data; ppc64_slb_entry_t *slb; int i, n; data = PPC64_MMU_DATA(kd); slb = data->slbs; n = data->slbsize / sizeof(ppc64_slb_entry_t); vsid <<= SLBV_VSID_SHIFT; /* SLB search */ for (i = 0; i < n; i++, slb++) { /* Check if valid and compare VSID */ if ((slb->slbe & SLBE_VALID) && (slb->slbv & SLBV_VSID_MASK) == vsid) break; } /* SLB not found */ if (i == n) { _kvm_err(kd, kd->program, "%s: segment not found for VSID 0x%jx", __func__, (uintmax_t)vsid >> SLBV_VSID_SHIFT); return (NULL); } return (slb); } static u_long get_ea(kvm_t *kd, ppc64_pt_entry_t *pte, u_long ptex) { ppc64_slb_entry_t *slb; uint64_t ea, hash, vsid; int b, shift; /* Find SLB */ vsid = PTEH_AVA_VSID(pte->pte_hi); if ((slb = slb_vsid_search(kd, vsid)) == NULL) return (~0UL); /* Get ESID part of EA */ ea = slb->slbe & SLBE_ESID_MASK; b = slb->slbv & SLBV_L? LP_PAGE_SHIFT : PPC64_PAGE_SHIFT; /* * If there are less than 64K PTEGs (16-bit), the upper bits of * EA page must be obtained from PTEH's AVA. */ if (kd->vmst->hdr.pmapsize / (8 * sizeof(ppc64_pt_entry_t)) < 0x10000U) { /* * Add 0 to 5 EA bits, right after VSID. * b == 12: 5 bits * b == 24: 4 bits */ shift = AVA_PAGE_SHIFT(b); ea |= (PTEH_AVA_PAGE(pte->pte_hi) >> shift) << (SLBE_ESID_SHIFT - 5 + shift); } /* Get VA page from hash and add to EA. */ hash = (ptex & ~7) >> 3; if (pte->pte_hi & LPTEH_HID) hash = ~hash & PTE_HASH_MASK; ea |= ((hash ^ (vsid & PTE_HASH_MASK)) << b) & ~SLBE_ESID_MASK; return (ea); } static int ppc64mmu_hpt_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg) { struct vmstate *vm; int ret; unsigned int pagesz; u_long dva, pa, va; u_long ptex, nptes; uint64_t vsid; ret = 0; vm = kd->vmst; nptes = vm->hdr.pmapsize / sizeof(ppc64_pt_entry_t); /* Walk through PTEs */ for (ptex = 0; ptex < nptes; ptex++) { ppc64_pt_entry_t pte = pte_get(kd, ptex); if ((pte.pte_hi & LPTEH_VALID) == 0) continue; /* Skip non-kernel related pages, as well as VRMA ones */ vsid = PTEH_AVA_VSID(pte.pte_hi); if ((vsid & KERNEL_VSID_BIT) == 0 || (vsid >> PPC64_PAGE_SHIFT) == VSID_VRMA) continue; /* Retrieve page's VA (EA on PPC64 terminology) */ if ((va = get_ea(kd, &pte, ptex)) == ~0UL) goto out; /* Get PA and page size */ if (pte.pte_hi & LPTEH_BIG) { pa = pte.pte_lo & PTEL_LP_PA_MASK; pagesz = LP_PAGE_SIZE; } else { pa = pte.pte_lo & PTEL_PA_MASK; pagesz = PPC64_PAGE_SIZE; } /* Get DMAP address */ dva = vm->hdr.dmapbase + pa; if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, entry_to_prot(&pte), pagesz, 0)) goto out; } ret = 1; out: return (ret); } static struct ppc64_mmu_ops ops = { .init = ppc64mmu_hpt_init, .cleanup = ppc64mmu_hpt_cleanup, .kvatop = ppc64mmu_hpt_kvatop, .walk_pages = ppc64mmu_hpt_walk_pages, }; struct ppc64_mmu_ops *ppc64_mmu_ops_hpt = &ops;