From cd2b8520681a57dec6986f5ba62b2b23e831b98c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Aug 2018 23:06:25 +0200 Subject: [PATCH 01/23] parisc: Use PARISC_ITLB_TRAP constant in entry.S Fixes: 5b00ca0b8035 ("parisc: Restore possibility to execute 64-bit applications") Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 242c5ab65611..04367b2b3b43 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1658,7 +1658,7 @@ dbit_fault: itlb_fault: b intr_save - ldi 6,%r8 + ldi PARISC_ITLB_TRAP,%r8 nadtlb_fault: b intr_save From c9dfa0c796f2c21255c45fe85a72657a192e1a49 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Aug 2018 11:28:08 +0900 Subject: [PATCH 02/23] parisc: remove check for minimum required GCC version Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6") bumped the minimum GCC version to 4.6 for all architectures. The version check in arch/parisc/Makefile is obsolete now. Signed-off-by: Masahiro Yamada Signed-off-by: Helge Deller --- arch/parisc/Makefile | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 5ce030266e7d..d047a09d660f 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -156,12 +156,3 @@ define archhelp @echo ' copy to $$(INSTALL_PATH)' @echo ' zinstall - Install compressed vmlinuz kernel' endef - -# we require gcc 3.3 or above to compile the kernel -archprepare: checkbin -checkbin: - @if test "$(cc-version)" -lt "0303"; then \ - echo -n "Sorry, GCC v3.3 or above is required to build " ; \ - echo "the kernel." ; \ - false ; \ - fi From 7c4ba3d38a3b013b79c3c07a639504cdc68fefdf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Aug 2018 11:00:28 +0200 Subject: [PATCH 03/23] parisc: remove the dead ccio-rm-dma driver This driver has never been wired up due to the life of the Linux git tree, and has severely bitrotted. Signed-off-by: Christoph Hellwig Signed-off-by: Helge Deller --- drivers/parisc/Makefile | 3 - drivers/parisc/ccio-rm-dma.c | 202 ----------------------------------- 2 files changed, 205 deletions(-) delete mode 100644 drivers/parisc/ccio-rm-dma.c diff --git a/drivers/parisc/Makefile b/drivers/parisc/Makefile index 3cd5e6cb8478..99fa6a89e0b9 100644 --- a/drivers/parisc/Makefile +++ b/drivers/parisc/Makefile @@ -8,9 +8,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_IOMMU_SBA) += sba_iommu.o obj-$(CONFIG_PCI_LBA) += lba_pci.o - -# Only use one of them: ccio-rm-dma is for PCX-W systems *only* -# obj-$(CONFIG_IOMMU_CCIO) += ccio-rm-dma.o obj-$(CONFIG_IOMMU_CCIO) += ccio-dma.o obj-$(CONFIG_GSC) += gsc.o diff --git a/drivers/parisc/ccio-rm-dma.c b/drivers/parisc/ccio-rm-dma.c deleted file mode 100644 index df7932af48b7..000000000000 --- a/drivers/parisc/ccio-rm-dma.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * ccio-rm-dma.c: - * DMA management routines for first generation cache-coherent machines. - * "Real Mode" operation refers to U2/Uturn chip operation. The chip - * can perform coherency checks w/o using the I/O MMU. That's all we - * need until support for more than 4GB phys mem is needed. - * - * This is the trivial case - basically what x86 does. - * - * Drawbacks of using Real Mode are: - * o outbound DMA is slower since one isn't using the prefetching - * U2 can do for outbound DMA. - * o Ability to do scatter/gather in HW is also lost. - * o only known to work with PCX-W processor. (eg C360) - * (PCX-U/U+ are not coherent with U2 in real mode.) - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * - * Original version/author: - * CVSROOT=:pserver:anonymous@198.186.203.37:/cvsroot/linux-parisc - * cvs -z3 co linux/arch/parisc/kernel/dma-rm.c - * - * (C) Copyright 2000 Philipp Rumpf - * - * - * Adopted for The Puffin Group's parisc-linux port by Grant Grundler. - * (C) Copyright 2000 Grant Grundler - * - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -/* Only chose "ccio" since that's what HP-UX calls it.... -** Make it easier for folks to migrate from one to the other :^) -*/ -#define MODULE_NAME "ccio" - -#define U2_IOA_RUNWAY 0x580 -#define U2_BC_GSC 0x501 -#define UTURN_IOA_RUNWAY 0x581 -#define UTURN_BC_GSC 0x502 - -#define IS_U2(id) ( \ - (((id)->hw_type == HPHW_IOA) && ((id)->hversion == U2_IOA_RUNWAY)) || \ - (((id)->hw_type == HPHW_BCPORT) && ((id)->hversion == U2_BC_GSC)) \ -) - -#define IS_UTURN(id) ( \ - (((id)->hw_type == HPHW_IOA) && ((id)->hversion == UTURN_IOA_RUNWAY)) || \ - (((id)->hw_type == HPHW_BCPORT) && ((id)->hversion == UTURN_BC_GSC)) \ -) - -static int ccio_dma_supported( struct pci_dev *dev, u64 mask) -{ - if (dev == NULL) { - printk(KERN_ERR MODULE_NAME ": EISA/ISA/et al not supported\n"); - BUG(); - return(0); - } - - /* only support 32-bit devices (ie PCI/GSC) */ - return((int) (mask >= 0xffffffffUL)); -} - - -static void *ccio_alloc_consistent(struct pci_dev *dev, size_t size, - dma_addr_t *handle) -{ - void *ret; - - ret = (void *)__get_free_pages(GFP_ATOMIC, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *handle = virt_to_phys(ret); - } - return ret; -} - -static void ccio_free_consistent(struct pci_dev *dev, size_t size, - void *vaddr, dma_addr_t handle) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} - -static dma_addr_t ccio_map_single(struct pci_dev *dev, void *ptr, size_t size, - int direction) -{ - return virt_to_phys(ptr); -} - -static void ccio_unmap_single(struct pci_dev *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - /* Nothing to do */ -} - - -static int ccio_map_sg(struct pci_dev *dev, struct scatterlist *sglist, int nents, int direction) -{ - int tmp = nents; - - /* KISS: map each buffer separately. */ - while (nents) { - sg_dma_address(sglist) = ccio_map_single(dev, sglist->address, sglist->length, direction); - sg_dma_len(sglist) = sglist->length; - nents--; - sglist++; - } - - return tmp; -} - - -static void ccio_unmap_sg(struct pci_dev *dev, struct scatterlist *sglist, int nents, int direction) -{ -#if 0 - while (nents) { - ccio_unmap_single(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction); - nents--; - sglist++; - } - return; -#else - /* Do nothing (copied from current ccio_unmap_single() :^) */ -#endif -} - - -static struct pci_dma_ops ccio_ops = { - ccio_dma_supported, - ccio_alloc_consistent, - ccio_free_consistent, - ccio_map_single, - ccio_unmap_single, - ccio_map_sg, - ccio_unmap_sg, - NULL, /* dma_sync_single_for_cpu : NOP for U2 */ - NULL, /* dma_sync_single_for_device : NOP for U2 */ - NULL, /* dma_sync_sg_for_cpu : ditto */ - NULL, /* dma_sync_sg_for_device : ditto */ -}; - - -/* -** Determine if u2 should claim this chip (return 0) or not (return 1). -** If so, initialize the chip and tell other partners in crime they -** have work to do. -*/ -static int __init -ccio_probe(struct parisc_device *dev) -{ - printk(KERN_INFO "%s found %s at 0x%lx\n", MODULE_NAME, - dev->id.hversion == U2_BC_GSC ? "U2" : "UTurn", - dev->hpa.start); - -/* -** FIXME - should check U2 registers to verify it's really running -** in "Real Mode". -*/ - -#if 0 -/* will need this for "Virtual Mode" operation */ - ccio_hw_init(ccio_dev); - ccio_common_init(ccio_dev); -#endif - hppa_dma_ops = &ccio_ops; - return 0; -} - -static const struct parisc_device_id ccio_tbl[] __initconst = { - { HPHW_BCPORT, HVERSION_REV_ANY_ID, U2_BC_GSC, 0xc }, - { HPHW_BCPORT, HVERSION_REV_ANY_ID, UTURN_BC_GSC, 0xc }, - { 0, } -}; - -static struct parisc_driver ccio_driver __refdata = { - .name = "U2/Uturn", - .id_table = ccio_tbl, - .probe = ccio_probe, -}; - -void __init ccio_init(void) -{ - register_parisc_driver(&ccio_driver); -} From a886c9791aed64d73f8c0038cc1506741ba216fa Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 16 Oct 2018 20:49:56 -0400 Subject: [PATCH 04/23] parisc: Reorder TLB flush timing calculation On boot (mostly reboot), my c8000 sometimes crashes after it prints the TLB flush threshold. The lockup is hard. The front LED flashes red and the box must be unplugged to reset the error. I noticed that when the crash occurs the TLB flush threshold is about one quarter what it is on a successful boot. If I disabled the calculation, the crash didn't occur. There also seemed to be a timing dependency affecting the crash. I finally realized that the flush_tlb_all() timing test runs just after the secondary CPUs are started. There seems to be a problem with running flush_tlb_all() too soon after the CPUs are started. The timing for the range test always seemed okay. So, I reversed the order of the two timing tests and I haven't had a crash at this point so far. I added a couple of information messages which I have left to help with diagnosis if the problem should appear on another machine. This version reduces the minimum TLB flush threshold to 16 KiB. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index bddd2acebdcc..9f1c29d06574 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -364,7 +364,7 @@ EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; -#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +#define FLUSH_TLB_THRESHOLD (16*1024) /* 16 KiB minimum TLB threshold */ static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) @@ -404,10 +404,6 @@ void __init parisc_setup_cache_timing(void) goto set_tlb_threshold; } - alltime = mfctl(16); - flush_tlb_all(); - alltime = mfctl(16) - alltime; - size = 0; start = (unsigned long) _text; rangetime = mfctl(16); @@ -418,13 +414,19 @@ void __init parisc_setup_cache_timing(void) } rangetime = mfctl(16) - rangetime; - printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + printk(KERN_INFO "Whole TLB flush %lu cycles, Range flush %lu bytes %lu cycles\n", alltime, size, rangetime); - threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime); + threshold = PAGE_ALIGN((num_online_cpus() * size * alltime) / rangetime); + printk(KERN_INFO "Calculated TLB flush threshold %lu KiB\n", + threshold/1024); set_tlb_threshold: - if (threshold) + if (threshold > parisc_tlb_flush_threshold) parisc_tlb_flush_threshold = threshold; printk(KERN_INFO "TLB flush threshold set to %lu KiB\n", parisc_tlb_flush_threshold/1024); From 32a7901f6d1d518ce25290c78553428110399212 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 29 Sep 2018 16:34:59 -0400 Subject: [PATCH 05/23] parisc: Remove PTE load and fault check from L2_ptep macro This change removes the PTE load and present check from the L2_ptep macro. The load and check for kernel pages is now done in the tlb_lock macro. This avoids a double load and check for user pages. The load and check for user pages is now done inside the lock so the fault handler can't be called while the entry is being updated. This version uses an ordered store to release the lock when the page table entry isn't present. It also corrects the check in the non SMP case. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 04367b2b3b43..00ac988d82a8 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -431,8 +431,6 @@ extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ - LDREG %r0(\pmd),\pte - bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm /* Look up PTE in a 3-Level scheme. @@ -463,7 +461,7 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_tlb_lock lock and recheck page is still present. */ + /* Acquire pa_tlb_lock lock and check page is present. */ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f @@ -472,11 +470,13 @@ cmpib,COND(=) 0,\tmp1,1b nop LDREG 0(\ptp),\pte - bb,<,n \pte,_PAGE_PRESENT_BIT,2f + bb,<,n \pte,_PAGE_PRESENT_BIT,3f b \fault - stw \spc,0(\tmp) -2: + stw,ma \spc,0(\tmp) #endif +2: LDREG 0(\ptp),\pte + bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault +3: .endm /* Release pa_tlb_lock lock without reloading lock address. */ From 8dbac7746e55e0c83fe818c137b1235c9fa1bd75 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 6 Oct 2018 21:14:56 +0200 Subject: [PATCH 06/23] parisc: Add SYSTEM_INFO and REGISTER TOC PAT functions Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdcpat.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h index a468a172ee33..7bf63fb87455 100644 --- a/arch/parisc/include/asm/pdcpat.h +++ b/arch/parisc/include/asm/pdcpat.h @@ -186,6 +186,14 @@ #define PAT_MEMUSE_GI 128 #define PAT_MEMUSE_GNI 129 +/* PDC PAT REGISTER TOC */ +#define PDC_PAT_REGISTER_TOC 75L +#define PDC_PAT_TOC_REGISTER_VECTOR 0L /* Register TOC Vector */ +#define PDC_PAT_TOC_READ_VECTOR 1L /* Read TOC Vector */ + +/* PDC PAT SYSTEM_INFO */ +#define PDC_PAT_SYSTEM_INFO 76L +/* PDC_PAT_SYSTEM_INFO uses the same options as PDC_SYSTEM_INFO function. */ #ifndef __ASSEMBLY__ #include From e98bc5ee9793742eca1129366109603010dc9389 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 7 Oct 2018 22:29:29 +0200 Subject: [PATCH 07/23] parisc: Clean up crash header output On kernel crash, this is the current output: Kernel Fault: Code=26 (Data memory access rights trap) regs=(ptrval) (Addr=00000004) Drop the address of regs, it's of no use for debugging, and show the faulty address without parenthesis. Signed-off-by: Helge Deller --- arch/parisc/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 68f10f87073d..bb65676dc41c 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -430,8 +430,8 @@ void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long o } printk("\n"); - pr_crit("%s: Code=%d (%s) regs=%p (Addr=" RFMT ")\n", - msg, code, trap_name(code), regs, offset); + pr_crit("%s: Code=%d (%s) at addr " RFMT "\n", + msg, code, trap_name(code), offset); show_regs(regs); spin_unlock(&terminate_lock); From 35d8be9c563c9b2d6550833ad494f90bffde7ff0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 7 Oct 2018 22:59:13 +0200 Subject: [PATCH 08/23] parisc: dino: Utilize DINO_MASK_IRQ() macro Signed-off-by: Helge Deller --- drivers/parisc/dino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 7390fb8ca9d1..ac646d42b300 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -382,7 +382,7 @@ static irqreturn_t dino_isr(int irq, void *intr_dev) DBG(KERN_DEBUG "%s(%d, %p) mask 0x%x\n", __func__, irq, intr_dev, mask); generic_handle_irq(irq); - mask &= ~(1 << local_irq); + mask &= ~DINO_MASK_IRQ(local_irq); } while (mask); /* Support for level triggered IRQ lines. From 2e37787df0ed0047848dba2eba53510a9185b991 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 7 Oct 2018 23:28:45 +0200 Subject: [PATCH 09/23] parisc: Ratelimit dino stuck interrupt warnings While playing with qemu with an emulated RT8139cp NIC, I faced lots of the following warnings: Dino 0x00810000: stuck interrupt 2 This patch ratelimits this warning and reports back that the IRQ was handled. Signed-off-by: Helge Deller --- drivers/parisc/dino.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index ac646d42b300..dfeea458a789 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -396,9 +396,8 @@ static irqreturn_t dino_isr(int irq, void *intr_dev) if (mask) { if (--ilr_loop > 0) goto ilr_again; - printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n", + pr_warn_ratelimited("Dino 0x%px: stuck interrupt %d\n", dino_dev->hba.base_addr, mask); - return IRQ_NONE; } return IRQ_HANDLED; } From d27dfa13b9f77ae7e6ed09d70a0426ed26c1a8f9 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 16 Oct 2018 21:07:59 -0400 Subject: [PATCH 10/23] parisc: Release spinlocks using ordered store This patch updates the spin unlock code to use an ordered store with release semanatics. All prior accesses are guaranteed to be performed before an ordered store is performed. Using an ordered store is significantly faster than using the sync memory barrier. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/spinlock.h | 4 ++-- arch/parisc/kernel/syscall.S | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 8a63515f03bf..16aec9ba2580 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); - mb(); - *a = 1; + /* Release with ordered store. */ + __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index f453997a7b8f..f5f22ea9b97e 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -640,8 +640,7 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ - sync - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -655,8 +654,7 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ - sync - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -857,8 +855,7 @@ cas2_action: cas2_end: /* Free lock */ - sync - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -868,8 +865,7 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ - sync - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit From 4dd5b673fa625d647447ae4c477bf946ae877711 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Thu, 20 Sep 2018 22:31:48 -0400 Subject: [PATCH 11/23] parisc: Purge TLB entries after updating page table entry and set page accessed flag in TLB handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch may resolve some races in TLB handling.  Hopefully, TLB inserts are accesses and protected by spin lock. If not, we may need to IPI calls and do local purges on PA 2.0. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgtable.h | 24 ++++++++++++------------ arch/parisc/kernel/entry.S | 4 +--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index fa6b7c78f18a..b86c31291f0a 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -66,9 +66,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) unsigned long flags; \ spin_lock_irqsave(&pa_tlb_lock, flags); \ old_pte = *ptep; \ + set_pte(ptep, pteval); \ if (pte_inserted(old_pte)) \ purge_tlb_entries(mm, addr); \ - set_pte(ptep, pteval); \ spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) @@ -202,7 +202,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT)) #define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC) @@ -227,22 +227,22 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #ifndef __ASSEMBLY__ -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_USER) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE) /* Others seem to make this executable, I don't know if that's correct or not. The stack is mapped this way though so this is necessary in the short term - dhd@linuxcare.com, 2000-08-08 */ -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) -#define PAGE_WRITEONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITE | _PAGE_ACCESSED) -#define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ) +#define PAGE_WRITEONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITE) +#define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC) #define PAGE_COPY PAGE_EXECREAD -#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) +#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) #define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX) #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) -#define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ) +#define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_GATEWAY| _PAGE_READ) /* @@ -479,8 +479,8 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } - purge_tlb_entries(vma->vm_mm, addr); set_pte(ptep, pte_mkold(pte)); + purge_tlb_entries(vma->vm_mm, addr); spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -493,9 +493,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; + set_pte(ptep, __pte(0)); if (pte_inserted(old_pte)) purge_tlb_entries(mm, addr); - set_pte(ptep, __pte(0)); spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; @@ -505,8 +505,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, { unsigned long flags; spin_lock_irqsave(&pa_tlb_lock, flags); - purge_tlb_entries(mm, addr); set_pte(ptep, pte_wrprotect(*ptep)); + purge_tlb_entries(mm, addr); spin_unlock_irqrestore(&pa_tlb_lock, flags); } diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 00ac988d82a8..0d662f0e7b70 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -483,9 +483,7 @@ .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 - sync - or,COND(=) %r0,\spc,%r0 - stw \spc,0(\tmp) + stw,ma \spc,0(\tmp) #endif .endm From 3c229b3f2dd8133f61bb81d3cb018be92f4bba39 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Oct 2018 22:37:46 +0200 Subject: [PATCH 12/23] parisc: Fix map_pages() to not overwrite existing pte entries Fix a long-existing small nasty bug in the map_pages() implementation which leads to overwriting already written pte entries with zero, *if* map_pages() is called a second time with an end address which isn't aligned on a pmd boundry. This happens for example if we want to remap only the text segment read/write in order to run alternative patching on the code. Exiting the loop when we reach the end address fixes this. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- arch/parisc/mm/init.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 74842d28a7a1..aae9b0d71c1e 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -494,12 +494,8 @@ static void __init map_pages(unsigned long start_vaddr, pte = pte_mkhuge(pte); } - if (address >= end_paddr) { - if (force) - break; - else - pte_val(pte) = 0; - } + if (address >= end_paddr) + break; set_pte(pg_table, pte); From 99a3ae51d557d8e38a7aece65678a31f9db215ee Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 14 Oct 2018 21:58:00 +0200 Subject: [PATCH 13/23] parisc: Fix exported address of os_hpmc handler In the C-code we need to put the physical address of the hpmc handler in the interrupt vector table (IVA) in order to get HPMCs working. Since on parisc64 function pointers are indirect (in fact they are function descriptors) we instead export the address as variable and not as function. This reverts a small part of commit f39cce654f9a ("parisc: Add cfi_startproc and cfi_endproc to assembly code"). Signed-off-by: Helge Deller Cc: [4.9+] --- arch/parisc/kernel/hpmc.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index 781c3b9a3e46..fde654115564 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -85,7 +85,7 @@ END(hpmc_pim_data) .import intr_save, code .align 16 -ENTRY_CFI(os_hpmc) +ENTRY(os_hpmc) .os_hpmc: /* @@ -302,7 +302,6 @@ os_hpmc_6: b . nop .align 16 /* make function length multiple of 16 bytes */ -ENDPROC_CFI(os_hpmc) .os_hpmc_end: From 1138b6718ff74d2a934459643e3754423d23b5e2 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 6 Oct 2018 13:11:30 -0400 Subject: [PATCH 14/23] parisc: Fix address in HPMC IVA Helge noticed that the address of the os_hpmc handler was not being correctly calculated in the hpmc macro. As a result, PDCE_CHECK would fail to call os_hpmc: e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY f600105e02e00000 fffffff0f0c00000 CC_MC_HPMC_MONARCH_SELECTED 140003b202e00000 000000000000000b CC_ERR_HPMC_STATE_ENTRY 5600100b02e00000 00000000000001a0 CC_MC_OS_HPMC_LEN_ERR 5600106402e00000 fffffff0f0438e70 CC_MC_BR_TO_OS_HPMC_FAILED e800009802e00000 0000000000000000 CC_ERR_CHECK_HPMC 37000f7302e00000 8040004000000000 CC_ERR_CPU_CHECK_SUMMARY 4000109f02e00000 0000000000000000 CC_MC_HPMC_INITIATED 4000101902e00000 0000000000000000 CC_MC_MULTIPLE_HPMCS 030010d502e00000 0000000000000000 CC_CPU_STOP The address problem can be seen by dumping the fault vector: 0000000040159000 : 40159000: 63 6f 77 73 stb r15,-2447(dp) 40159004: 20 63 61 6e ldil L%b747000,r3 40159008: 20 66 6c 79 ldil L%-1c3b3000,r3 ... 40159020: 08 00 02 40 nop 40159024: 20 6e 60 02 ldil L%15d000,r3 40159028: 34 63 00 00 ldo 0(r3),r3 4015902c: e8 60 c0 02 bv,n r0(r3) 40159030: 08 00 02 40 nop 40159034: 00 00 00 00 break 0,0 40159038: c0 00 70 00 bb,*< r0,sar,40159840 4015903c: 00 00 00 00 break 0,0 Location 40159038 should contain the physical address of os_hpmc: 000000004015d000 : 4015d000: 08 1a 02 43 copy r26,r3 4015d004: 01 c0 08 a4 mfctl iva,r4 4015d008: 48 85 00 68 ldw 34(r4),r5 This patch moves the address setup into initialize_ivt to resolve the above problem. I tested the change by dumping the HPMC entry after setup: 0000000040209020: 8000240 0000000040209024: 206a2004 0000000040209028: 34630ac0 000000004020902c: e860c002 0000000040209030: 8000240 0000000040209034: 1bdddce6 0000000040209038: 15d000 000000004020903c: 1a0 Signed-off-by: John David Anglin Cc: Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 2 +- arch/parisc/kernel/traps.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 0d662f0e7b70..7c85a91b4710 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -186,7 +186,7 @@ bv,n 0(%r3) nop .word 0 /* checksum (will be patched) */ - .word PA(os_hpmc) /* address of handler */ + .word 0 /* address of handler */ .word 0 /* length of handler */ .endm diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index bb65676dc41c..472a818e8c17 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -802,7 +802,8 @@ void __init initialize_ivt(const void *iva) * the Length/4 words starting at Address is zero. */ - /* Compute Checksum for HPMC handler */ + /* Setup IVA and compute checksum for HPMC handler */ + ivap[6] = (u32)__pa(os_hpmc); length = os_hpmc_size; ivap[7] = length; From db139d71c4c377c56a67ae219f120186ce9934d1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 16 Oct 2018 08:03:24 +0200 Subject: [PATCH 15/23] extract-vmlinux: Check for uncompressed image as fallback As on x86-64 and other architectures, the boot kernel on parisc (vmlinuz and bzImage) contains a full compressed copy of the final kernel executable (vmlinux.bin.gz), which one should be able to extract with the extract-vmlinux script. But on parisc extracting the kernel with extract-vmlinux fails. Currently the script first checks if the given file is an ELF file (which is true on parisc) and if so returns it. Thus on parisc we unexpectedly get back the vmlinuz boot file instead of the uncompressed vmlinux image. This patch fixes this issue by reverting the logic. It now first tries to find a compression signature in the given file and if that fails it checks the file itself as fallback. Signed-off-by: Helge Deller --- scripts/extract-vmlinux | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/extract-vmlinux b/scripts/extract-vmlinux index e6239f39abad..85e1f32fb4a0 100755 --- a/scripts/extract-vmlinux +++ b/scripts/extract-vmlinux @@ -48,9 +48,6 @@ fi tmp=$(mktemp /tmp/vmlinux-XXX) trap "rm -f $tmp" 0 -# Initial attempt for uncompressed images or objects: -check_vmlinux $img - # That didn't work, so retry after decompression. try_decompress '\037\213\010' xy gunzip try_decompress '\3757zXZ\000' abcde unxz @@ -60,5 +57,8 @@ try_decompress '\211\114\132' xy 'lzop -d' try_decompress '\002!L\030' xxx 'lz4 -d' try_decompress '(\265/\375' xxx unzstd +# Finally check for uncompressed images or objects: +check_vmlinux $img + # Bail out: echo "$me: Cannot find vmlinux." >&2 From 34c201ae49fe9e0bf3b389da5869d810f201c740 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 15 Oct 2018 22:14:01 +0200 Subject: [PATCH 16/23] parisc: Include compressed vmlinux file in vmlinuz boot kernel Change the parisc vmlinuz boot code to include and process the real compressed vmlinux.gz ELF file instead of a compressed memory dump. This brings parisc in sync on how it's done on x86_64. The benefit of this change is that, e.g. for debugging purposes, one can then extract the vmlinux file out of the vmlinuz which was booted which wasn't possible before. This can be archieved with the existing scripts/extract-vmlinux script, which just needs a small tweak to prefer to extract a compressed file before trying the existing given binary. The downside of this approach is that due to the extra round of decompression/ELF processing we need more physical memory installed to be able to boot a kernel. Signed-off-by: Helge Deller --- arch/parisc/boot/compressed/Makefile | 4 +- arch/parisc/boot/compressed/misc.c | 95 ++++++++++++++++++----- arch/parisc/boot/compressed/vmlinux.lds.S | 10 ++- arch/parisc/include/asm/page.h | 12 +-- 4 files changed, 91 insertions(+), 30 deletions(-) diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 7d7e594bda36..777533cdea31 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -14,7 +14,7 @@ targets += misc.o piggy.o sizes.h head.o real2.o firmware.o KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -fno-builtin-printf KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os ifndef CONFIG_64BIT KBUILD_CFLAGS += -mfast-indirect-calls @@ -22,7 +22,6 @@ endif OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o -# LDFLAGS_vmlinux := -X --whole-archive -e startup -T LDFLAGS_vmlinux := -X -e startup --as-needed -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) $(call if_changed,ld) @@ -55,7 +54,6 @@ $(obj)/misc.o: $(obj)/sizes.h CPPFLAGS_vmlinux.lds += -I$(objtree)/$(obj) -DBOOTLOADER $(obj)/vmlinux.lds: $(obj)/sizes.h -OBJCOPYFLAGS_vmlinux.bin := -O binary -R .comment -S $(obj)/vmlinux.bin: vmlinux $(call if_changed,objcopy) diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index f57118e1f6b4..2556bb181813 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include "sizes.h" @@ -227,13 +228,62 @@ static void flush_data_cache(char *start, unsigned long length) asm ("sync"); } +static void parse_elf(void *output) +{ +#ifdef CONFIG_64BIT + Elf64_Ehdr ehdr; + Elf64_Phdr *phdrs, *phdr; +#else + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *phdr; +#endif + void *dest; + int i; + + memcpy(&ehdr, output, sizeof(ehdr)); + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || + ehdr.e_ident[EI_MAG3] != ELFMAG3) { + error("Kernel is not a valid ELF file"); + return; + } + +#ifdef DEBUG + printf("Parsing ELF... "); +#endif + + phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); + if (!phdrs) + error("Failed to allocate space for phdrs"); + + memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + + for (i = 0; i < ehdr.e_phnum; i++) { + phdr = &phdrs[i]; + + switch (phdr->p_type) { + case PT_LOAD: + dest = (void *)((unsigned long) phdr->p_paddr & + (__PAGE_OFFSET_DEFAULT-1)); + memmove(dest, output + phdr->p_offset, phdr->p_filesz); + break; + default: + break; + } + } + + free(phdrs); +} + unsigned long decompress_kernel(unsigned int started_wide, unsigned int command_line, const unsigned int rd_start, const unsigned int rd_end) { char *output; - unsigned long len, len_all; + unsigned long vmlinux_addr, vmlinux_len; + unsigned long kernel_addr, kernel_len; #ifdef CONFIG_64BIT parisc_narrow_firmware = 0; @@ -241,27 +291,29 @@ unsigned long decompress_kernel(unsigned int started_wide, set_firmware_width_unlocked(); - putchar('U'); /* if you get this p and no more, string storage */ + putchar('D'); /* if you get this D and no more, string storage */ /* in $GLOBAL$ is wrong or %dp is wrong */ - puts("ncompressing ...\n"); + puts("ecompressing Linux... "); - output = (char *) KERNEL_BINARY_TEXT_START; - len_all = __pa(SZ_end) - __pa(SZparisc_kernel_start); - - if ((unsigned long) &_startcode_end > (unsigned long) output) + /* where the final bits are stored */ + kernel_addr = KERNEL_BINARY_TEXT_START; + kernel_len = __pa(SZ_end) - __pa(SZparisc_kernel_start); + if ((unsigned long) &_startcode_end > kernel_addr) error("Bootcode overlaps kernel code"); - len = get_unaligned_le32(&output_len); - if (len > len_all) - error("Output len too big."); - else - memset(&output[len], 0, len_all - len); + /* + * Calculate addr to where the vmlinux ELF file shall be decompressed. + * Assembly code in head.S positioned the stack directly behind bss, so + * leave 2 MB for the stack. + */ + vmlinux_addr = (unsigned long) &_ebss + 2*1024*1024; + vmlinux_len = get_unaligned_le32(&output_len); + output = (char *) vmlinux_addr; /* * Initialize free_mem_ptr and free_mem_end_ptr. */ - free_mem_ptr = (unsigned long) &_ebss; - free_mem_ptr += 2*1024*1024; /* leave 2 MB for stack */ + free_mem_ptr = vmlinux_addr + vmlinux_len; /* Limit memory for bootoader to 1GB */ #define ARTIFICIAL_LIMIT (1*1024*1024*1024) @@ -275,7 +327,11 @@ unsigned long decompress_kernel(unsigned int started_wide, free_mem_end_ptr = rd_start; #endif + if (free_mem_ptr >= free_mem_end_ptr) + error("Kernel too big for machine."); + #ifdef DEBUG + printf("\n"); printf("startcode_end = %x\n", &_startcode_end); printf("commandline = %x\n", command_line); printf("rd_start = %x\n", rd_start); @@ -287,16 +343,19 @@ unsigned long decompress_kernel(unsigned int started_wide, printf("input_data = %x\n", input_data); printf("input_len = %x\n", input_len); printf("output = %x\n", output); - printf("output_len = %x\n", len); - printf("output_max = %x\n", len_all); + printf("output_len = %x\n", vmlinux_len); + printf("kernel_addr = %x\n", kernel_addr); + printf("kernel_len = %x\n", kernel_len); #endif __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); + parse_elf(output); - flush_data_cache(output, len); + output = (char *) kernel_addr; + flush_data_cache(output, kernel_len); - printf("Booting kernel ...\n\n"); + printf("done.\nBooting the kernel.\n"); return (unsigned long) output; } diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S index 4ebd4e65524c..bfd7872739a3 100644 --- a/arch/parisc/boot/compressed/vmlinux.lds.S +++ b/arch/parisc/boot/compressed/vmlinux.lds.S @@ -42,6 +42,12 @@ SECTIONS #endif _startcode_end = .; + /* vmlinux.bin.gz is here */ + . = ALIGN(8); + .rodata.compressed : { + *(.rodata.compressed) + } + /* bootloader code and data starts behind area of extracted kernel */ . = (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START); @@ -68,10 +74,6 @@ SECTIONS _erodata = . ; } . = ALIGN(8); - .rodata.compressed : { - *(.rodata.compressed) - } - . = ALIGN(8); .bss : { _bss = . ; *(.bss) diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index af00fe9bf846..b77f49ce6220 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -117,14 +117,16 @@ extern int npmem_ranges; /* This governs the relationship between virtual and physical addresses. * If you alter it, make sure to take care of our various fixed mapping * segments in fixmap.h */ -#if defined(BOOTLOADER) -#define __PAGE_OFFSET (0) /* bootloader uses physical addresses */ -#else #ifdef CONFIG_64BIT -#define __PAGE_OFFSET (0x40000000) /* 1GB */ +#define __PAGE_OFFSET_DEFAULT (0x40000000) /* 1GB */ #else -#define __PAGE_OFFSET (0x10000000) /* 256MB */ +#define __PAGE_OFFSET_DEFAULT (0x10000000) /* 256MB */ #endif + +#if defined(BOOTLOADER) +#define __PAGE_OFFSET (0) /* bootloader uses physical addresses */ +#else +#define __PAGE_OFFSET __PAGE_OFFSET_DEFAULT #endif /* BOOTLOADER */ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) From 3847dab77421867fbc77faacb2f377d44e729e1b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 16 Oct 2018 22:38:22 +0200 Subject: [PATCH 17/23] parisc: Add alternative coding infrastructure This patch adds the necessary code to patch a running kernel at runtime to improve performance. The current implementation offers a few optimizations variants: - When running a SMP kernel on a single UP processor, unwanted assembler statements like locking functions are overwritten with NOPs. When multiple instructions shall be skipped, one branch instruction is used instead of multiple nop instructions. - In the UP case, some pdtlb and pitlb instructions are patched to become pdtlb,l and pitlb,l which only flushes the CPU-local tlb entries instead of broadcasting the flush to other CPUs in the system and thus may improve performance. - fic and fdc instructions are skipped if no I- or D-caches are installed. This should speed up qemu emulation and cacheless systems. - If no cache coherence is needed for IO operations, the relevant fdc and sync instructions in the sba and ccio drivers are replaced by nops. - On systems which share I- and D-TLBs and thus don't have a seperate instruction TLB, the pitlb instruction is replaced by a nop. Live-patching is done early in the boot process, just after having run the system inventory. No drivers are running and thus no external interrupts should arrive. So the hope is that no TLB exceptions will occur during the patching. If this turns out to be wrong we will probably need to do the patching in real-mode. Signed-off-by: Helge Deller --- arch/parisc/include/asm/alternative.h | 47 ++++++++++++++++ arch/parisc/include/asm/cache.h | 22 +++++++- arch/parisc/include/asm/pgtable.h | 3 +- arch/parisc/include/asm/sections.h | 2 + arch/parisc/include/asm/tlbflush.h | 3 +- arch/parisc/kernel/cache.c | 12 ---- arch/parisc/kernel/entry.S | 10 +++- arch/parisc/kernel/pacache.S | 64 ++++++++++++++------- arch/parisc/kernel/setup.c | 81 +++++++++++++++++++++++++++ arch/parisc/kernel/signal.c | 1 - arch/parisc/kernel/vmlinux.lds.S | 6 ++ arch/parisc/mm/init.c | 15 +++++ drivers/parisc/ccio-dma.c | 12 ++-- drivers/parisc/sba_iommu.c | 17 ++---- 14 files changed, 233 insertions(+), 62 deletions(-) create mode 100644 arch/parisc/include/asm/alternative.h diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h new file mode 100644 index 000000000000..bf485a94d0b4 --- /dev/null +++ b/arch/parisc/include/asm/alternative.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PARISC_ALTERNATIVE_H +#define __ASM_PARISC_ALTERNATIVE_H + +#define ALT_COND_NO_SMP 0x01 /* when running UP instead of SMP */ +#define ALT_COND_NO_DCACHE 0x02 /* if system has no d-cache */ +#define ALT_COND_NO_ICACHE 0x04 /* if system has no i-cache */ +#define ALT_COND_NO_SPLIT_TLB 0x08 /* if split_tlb == 0 */ +#define ALT_COND_NO_IOC_FDC 0x10 /* if I/O cache does not need flushes */ + +#define INSN_PxTLB 0x02 /* modify pdtlb, pitlb */ +#define INSN_NOP 0x08000240 /* nop */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +struct alt_instr { + s32 orig_offset; /* offset to original instructions */ + u32 len; /* end of original instructions */ + u32 cond; /* see ALT_COND_XXX */ + u32 replacement; /* replacement instruction or code */ +}; + +void set_kernel_text_rw(int enable_read_write); + +/* Alternative SMP implementation. */ +#define ALTERNATIVE(cond, replacement) "!0:" \ + ".section .altinstructions, \"aw\" !" \ + ".word (0b-4-.), 1, " __stringify(cond) "," \ + __stringify(replacement) " !" \ + ".previous" + +#else + +#define ALTERNATIVE(from, to, cond, replacement)\ + .section .altinstructions, "aw" ! \ + .word (from - .), (to - from)/4 ! \ + .word cond, replacement ! \ + .previous + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_PARISC_ALTERNATIVE_H */ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 150b7f30ea90..006fb939cac8 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -6,6 +6,7 @@ #ifndef __ARCH_PARISC_CACHE_H #define __ARCH_PARISC_CACHE_H +#include /* * PA 2.0 processors have 64 and 128-byte L2 cachelines; PA 1.1 processors @@ -41,9 +42,24 @@ extern int icache_stride; extern struct pdc_cache_info cache_info; void parisc_setup_cache_timing(void); -#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" : : "r" (addr)); -#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" : : "r" (addr)); -#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" : : "r" (addr)); +#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \ + ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ + : : "r" (addr)) +#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \ + ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ + ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \ + : : "r" (addr)) +#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \ + ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ + : : "r" (addr)) + +#define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \ + ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ + ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) \ + : : "r" (addr)) +#define asm_io_sync() asm volatile("sync" \ + ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ + ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :: ) #endif /* ! __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index b86c31291f0a..94c0ef7a9e03 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -43,8 +43,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) { mtsp(mm->context, 1); pdtlb(addr); - if (unlikely(split_tlb)) - pitlb(addr); + pitlb(addr); } /* Certain architectures need to do special things when PTEs diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index 5a40b51df80c..bb52aea0cb21 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -5,6 +5,8 @@ /* nothing to see, move along */ #include +extern char __alt_instructions[], __alt_instructions_end[]; + #ifdef CONFIG_64BIT #define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 14668bd52d60..6804374efa66 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -85,8 +85,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - if (unlikely(split_tlb)) - pitlb(addr); + pitlb(addr); purge_tlb_end(flags); } #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 9f1c29d06574..56dc9791ee23 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -479,18 +479,6 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, /* Purge TLB entries for small ranges using the pdtlb and pitlb instructions. These instructions execute locally but cause a purge request to be broadcast to other TLBs. */ - if (likely(!split_tlb)) { - while (start < end) { - purge_tlb_start(flags); - mtsp(sid, 1); - pdtlb(start); - purge_tlb_end(flags); - start += PAGE_SIZE; - } - return 0; - } - - /* split TLB case */ while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 7c85a91b4710..d3e2633cd688 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -464,7 +465,7 @@ /* Acquire pa_tlb_lock lock and check page is present. */ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP - cmpib,COND(=),n 0,\spc,2f +98: cmpib,COND(=),n 0,\spc,2f load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b @@ -473,6 +474,7 @@ bb,<,n \pte,_PAGE_PRESENT_BIT,3f b \fault stw,ma \spc,0(\tmp) +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif 2: LDREG 0(\ptp),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault @@ -482,15 +484,17 @@ /* Release pa_tlb_lock lock without reloading lock address. */ .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 +98: or,COND(=) %r0,\spc,%r0 stw,ma \spc,0(\tmp) +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load_pa_tlb_lock \tmp +98: load_pa_tlb_lock \tmp +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) tlb_unlock0 \spc,\tmp #endif .endm diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index f33bf2d306d6..b41c0136a05f 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -190,7 +191,7 @@ ENDPROC_CFI(flush_tlb_all_local) .import cache_info,data ENTRY_CFI(flush_instruction_cache_local) - load32 cache_info, %r1 +88: load32 cache_info, %r1 /* Flush Instruction Cache */ @@ -243,6 +244,7 @@ fioneloop2: fisync: sync mtsm %r22 /* restore I-bit */ +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) bv %r0(%r2) nop ENDPROC_CFI(flush_instruction_cache_local) @@ -250,7 +252,7 @@ ENDPROC_CFI(flush_instruction_cache_local) .import cache_info, data ENTRY_CFI(flush_data_cache_local) - load32 cache_info, %r1 +88: load32 cache_info, %r1 /* Flush Data Cache */ @@ -304,6 +306,7 @@ fdsync: syncdma sync mtsm %r22 /* restore I-bit */ +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) bv %r0(%r2) nop ENDPROC_CFI(flush_data_cache_local) @@ -312,6 +315,7 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP +98: #if __PA_LDCW_ALIGNMENT > 4 load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la depi 0,31,__PA_LDCW_ALIGN_ORDER, \la @@ -326,15 +330,17 @@ ENDPROC_CFI(flush_data_cache_local) nop b,n 2b 3: +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm .macro tlb_unlock la,flags,tmp #ifdef CONFIG_SMP - ldi 1,\tmp +98: ldi 1,\tmp sync stw \tmp,0(\la) mtsm \flags +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm @@ -596,9 +602,11 @@ ENTRY_CFI(copy_user_page_asm) pdtlb,l %r0(%r29) #else tlb_lock %r20,%r21,%r22 - pdtlb %r0(%r28) - pdtlb %r0(%r29) +0: pdtlb %r0(%r28) +1: pdtlb %r0(%r29) tlb_unlock %r20,%r21,%r22 + ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) + ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif #ifdef CONFIG_64BIT @@ -736,8 +744,9 @@ ENTRY_CFI(clear_user_page_asm) pdtlb,l %r0(%r28) #else tlb_lock %r20,%r21,%r22 - pdtlb %r0(%r28) +0: pdtlb %r0(%r28) tlb_unlock %r20,%r21,%r22 + ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif #ifdef CONFIG_64BIT @@ -813,11 +822,12 @@ ENTRY_CFI(flush_dcache_page_asm) pdtlb,l %r0(%r28) #else tlb_lock %r20,%r21,%r22 - pdtlb %r0(%r28) +0: pdtlb %r0(%r28) tlb_unlock %r20,%r21,%r22 + ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif - ldil L%dcache_stride, %r1 +88: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), r31 #ifdef CONFIG_64BIT @@ -847,6 +857,7 @@ ENTRY_CFI(flush_dcache_page_asm) cmpb,COND(<<) %r28, %r25,1b fdc,m r31(%r28) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) sync bv %r0(%r2) nop @@ -874,15 +885,19 @@ ENTRY_CFI(flush_icache_page_asm) #ifdef CONFIG_PA20 pdtlb,l %r0(%r28) - pitlb,l %r0(%sr4,%r28) +1: pitlb,l %r0(%sr4,%r28) + ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP) #else tlb_lock %r20,%r21,%r22 - pdtlb %r0(%r28) - pitlb %r0(%sr4,%r28) +0: pdtlb %r0(%r28) +1: pitlb %r0(%sr4,%r28) tlb_unlock %r20,%r21,%r22 + ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) + ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB) + ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP) #endif - ldil L%icache_stride, %r1 +88: ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r31 #ifdef CONFIG_64BIT @@ -914,13 +929,14 @@ ENTRY_CFI(flush_icache_page_asm) cmpb,COND(<<) %r28, %r25,1b fic,m %r31(%sr4,%r28) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) sync bv %r0(%r2) nop ENDPROC_CFI(flush_icache_page_asm) ENTRY_CFI(flush_kernel_dcache_page_asm) - ldil L%dcache_stride, %r1 +88: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 #ifdef CONFIG_64BIT @@ -950,13 +966,14 @@ ENTRY_CFI(flush_kernel_dcache_page_asm) cmpb,COND(<<) %r26, %r25,1b fdc,m %r23(%r26) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) sync bv %r0(%r2) nop ENDPROC_CFI(flush_kernel_dcache_page_asm) ENTRY_CFI(purge_kernel_dcache_page_asm) - ldil L%dcache_stride, %r1 +88: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 #ifdef CONFIG_64BIT @@ -985,13 +1002,14 @@ ENTRY_CFI(purge_kernel_dcache_page_asm) cmpb,COND(<<) %r26, %r25, 1b pdc,m %r23(%r26) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) sync bv %r0(%r2) nop ENDPROC_CFI(purge_kernel_dcache_page_asm) ENTRY_CFI(flush_user_dcache_range_asm) - ldil L%dcache_stride, %r1 +88: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 @@ -999,13 +1017,14 @@ ENTRY_CFI(flush_user_dcache_range_asm) 1: cmpb,COND(<<),n %r26, %r25, 1b fdc,m %r23(%sr3, %r26) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) sync bv %r0(%r2) nop ENDPROC_CFI(flush_user_dcache_range_asm) ENTRY_CFI(flush_kernel_dcache_range_asm) - ldil L%dcache_stride, %r1 +88: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 @@ -1014,13 +1033,14 @@ ENTRY_CFI(flush_kernel_dcache_range_asm) fdc,m %r23(%r26) sync +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) syncdma bv %r0(%r2) nop ENDPROC_CFI(flush_kernel_dcache_range_asm) ENTRY_CFI(purge_kernel_dcache_range_asm) - ldil L%dcache_stride, %r1 +88: ldil L%dcache_stride, %r1 ldw R%dcache_stride(%r1), %r23 ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 @@ -1029,13 +1049,14 @@ ENTRY_CFI(purge_kernel_dcache_range_asm) pdc,m %r23(%r26) sync +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) syncdma bv %r0(%r2) nop ENDPROC_CFI(purge_kernel_dcache_range_asm) ENTRY_CFI(flush_user_icache_range_asm) - ldil L%icache_stride, %r1 +88: ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r23 ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 @@ -1043,13 +1064,14 @@ ENTRY_CFI(flush_user_icache_range_asm) 1: cmpb,COND(<<),n %r26, %r25,1b fic,m %r23(%sr3, %r26) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) sync bv %r0(%r2) nop ENDPROC_CFI(flush_user_icache_range_asm) ENTRY_CFI(flush_kernel_icache_page) - ldil L%icache_stride, %r1 +88: ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r23 #ifdef CONFIG_64BIT @@ -1079,13 +1101,14 @@ ENTRY_CFI(flush_kernel_icache_page) cmpb,COND(<<) %r26, %r25, 1b fic,m %r23(%sr4, %r26) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) sync bv %r0(%r2) nop ENDPROC_CFI(flush_kernel_icache_page) ENTRY_CFI(flush_kernel_icache_range_asm) - ldil L%icache_stride, %r1 +88: ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r23 ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 @@ -1093,6 +1116,7 @@ ENTRY_CFI(flush_kernel_icache_range_asm) 1: cmpb,COND(<<),n %r26, %r25, 1b fic,m %r23(%sr4, %r26) +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) sync bv %r0(%r2) nop diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 4e87c35c22b7..db6e7957f9a3 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -305,6 +305,86 @@ static int __init parisc_init_resources(void) return 0; } +static int no_alternatives __initdata; +static int __init setup_no_alternatives(char *str) +{ + no_alternatives = 1; + return 1; +} +__setup("no-alternatives", setup_no_alternatives); + +static void __init apply_alternatives_all(void) +{ + struct alt_instr *entry; + int index = 0, applied = 0; + + + pr_info("alternatives: %spatching kernel code\n", + no_alternatives ? "NOT " : ""); + if (no_alternatives) + return; + + set_kernel_text_rw(1); + + for (entry = (struct alt_instr *) &__alt_instructions; + entry < (struct alt_instr *) &__alt_instructions_end; + entry++, index++) { + + u32 *from, len, cond, replacement; + + from = (u32 *)((ulong)&entry->orig_offset + entry->orig_offset); + len = entry->len; + cond = entry->cond; + replacement = entry->replacement; + + WARN_ON(!cond); + pr_debug("Check %d: Cond 0x%x, Replace %02d instructions @ 0x%px with 0x%08x\n", + index, cond, len, from, replacement); + + if ((cond & ALT_COND_NO_SMP) && (num_online_cpus() != 1)) + continue; + if ((cond & ALT_COND_NO_DCACHE) && (cache_info.dc_size != 0)) + continue; + if ((cond & ALT_COND_NO_ICACHE) && (cache_info.ic_size != 0)) + continue; + + /* + * If the PDC_MODEL capabilities has Non-coherent IO-PDIR bit + * set (bit #61, big endian), we have to flush and sync every + * time IO-PDIR is changed in Ike/Astro. + */ + if ((cond & ALT_COND_NO_IOC_FDC) && + (boot_cpu_data.pdc.capabilities & PDC_MODEL_IOPDIR_FDC)) + continue; + + /* Want to replace pdtlb by a pdtlb,l instruction? */ + if (replacement == INSN_PxTLB) { + replacement = *from; + if (boot_cpu_data.cpu_type >= pcxu) /* >= pa2.0 ? */ + replacement |= (1 << 10); /* set el bit */ + } + + /* + * Replace instruction with NOPs? + * For long distance insert a branch instruction instead. + */ + if (replacement == INSN_NOP && len > 1) + replacement = 0xe8000002 + (len-2)*8; /* "b,n .+8" */ + + pr_debug("Do %d: Cond 0x%x, Replace %02d instructions @ 0x%px with 0x%08x\n", + index, cond, len, from, replacement); + + /* Replace instruction */ + *from = replacement; + applied++; + } + + pr_info("alternatives: applied %d out of %d patches\n", applied, index); + + set_kernel_text_rw(0); +} + + extern void gsc_init(void); extern void processor_init(void); extern void ccio_init(void); @@ -346,6 +426,7 @@ static int __init parisc_init(void) boot_cpu_data.cpu_hz / 1000000, boot_cpu_data.cpu_hz % 1000000 ); + apply_alternatives_all(); parisc_setup_cache_timing(); /* These are in a non-obvious order, will fix when we have an iotree */ diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 342073f44d3f..848c1934680b 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -65,7 +65,6 @@ #define INSN_LDI_R25_1 0x34190002 /* ldi 1,%r25 (in_syscall=1) */ #define INSN_LDI_R20 0x3414015a /* ldi __NR_rt_sigreturn,%r20 */ #define INSN_BLE_SR2_R0 0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */ -#define INSN_NOP 0x08000240 /* nop */ /* For debugging */ #define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index da2e31190efa..c3b1b9c24ede 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -61,6 +61,12 @@ SECTIONS EXIT_DATA } PERCPU_SECTION(8) + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } . = ALIGN(HUGEPAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index aae9b0d71c1e..e7e626bcd0be 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -511,6 +511,21 @@ static void __init map_pages(unsigned long start_vaddr, } } +void __init set_kernel_text_rw(int enable_read_write) +{ + unsigned long start = (unsigned long)_stext; + unsigned long end = (unsigned long)_etext; + + map_pages(start, __pa(start), end-start, + PAGE_KERNEL_RWX, enable_read_write ? 1:0); + + /* force the kernel to see the new TLB entries */ + __flush_tlb_range(0, start, end); + + /* dump old cached instructions */ + flush_icache_range(start, end); +} + void __ref free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 614823617b8b..701a7d6a74d5 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -609,14 +609,13 @@ ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, ** PCX-T'? Don't know. (eg C110 or similar K-class) ** ** See PDC_MODEL/option 0/SW_CAP word for "Non-coherent IO-PDIR bit". - ** Hopefully we can patch (NOP) these out at boot time somehow. ** ** "Since PCX-U employs an offset hash that is incompatible with ** the real mode coherence index generation of U2, the PDIR entry ** must be flushed to memory to retain coherence." */ - asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr)); - asm volatile("sync"); + asm_io_fdc(pdir_ptr); + asm_io_sync(); } /** @@ -682,17 +681,14 @@ ccio_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) ** FIXME: PCX_W platforms don't need FDC/SYNC. (eg C360) ** PCX-U/U+ do. (eg C200/C240) ** See PDC_MODEL/option 0/SW_CAP for "Non-coherent IO-PDIR bit". - ** - ** Hopefully someone figures out how to patch (NOP) the - ** FDC/SYNC out at boot time. */ - asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr[7])); + asm_io_fdc(pdir_ptr); iovp += IOVP_SIZE; byte_cnt -= IOVP_SIZE; } - asm volatile("sync"); + asm_io_sync(); ccio_clear_io_tlb(ioc, CCIO_IOVP(iova), saved_byte_cnt); } diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 11de0eccf968..c1e599a429af 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -587,8 +587,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, * (bit #61, big endian), we have to flush and sync every time * IO-PDIR is changed in Ike/Astro. */ - if (ioc_needs_fdc) - asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr)); + asm_io_fdc(pdir_ptr); } @@ -641,8 +640,8 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) do { /* clear I/O Pdir entry "valid" bit first */ ((u8 *) pdir_ptr)[7] = 0; + asm_io_fdc(pdir_ptr); if (ioc_needs_fdc) { - asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr)); #if 0 entries_per_cacheline = L1_CACHE_SHIFT - 3; #endif @@ -661,8 +660,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) ** could dump core on HPMC. */ ((u8 *) pdir_ptr)[7] = 0; - if (ioc_needs_fdc) - asm volatile("fdc %%r0(%0)" : : "r" (pdir_ptr)); + asm_io_fdc(pdir_ptr); WRITE_REG( SBA_IOVA(ioc, iovp, 0, 0), ioc->ioc_hpa+IOC_PCOM); } @@ -773,8 +771,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, } /* force FDC ops in io_pdir_entry() to be visible to IOMMU */ - if (ioc_needs_fdc) - asm volatile("sync" : : ); + asm_io_sync(); #ifdef ASSERT_PDIR_SANITY sba_check_pdir(ioc,"Check after sba_map_single()"); @@ -858,8 +855,7 @@ sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, sba_free_range(ioc, iova, size); /* If fdc's were issued, force fdc's to be visible now */ - if (ioc_needs_fdc) - asm volatile("sync" : : ); + asm_io_sync(); READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ #endif /* DELAYED_RESOURCE_CNT == 0 */ @@ -1008,8 +1004,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, filled = iommu_fill_pdir(ioc, sglist, nents, 0, sba_io_pdir_entry); /* force FDC ops in io_pdir_entry() to be visible to IOMMU */ - if (ioc_needs_fdc) - asm volatile("sync" : : ); + asm_io_sync(); #ifdef ASSERT_PDIR_SANITY if (sba_check_pdir(ioc,"Check after sba_map_sg()")) From a45a01160f1d46a3021755a2cf2f433aec008e38 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Oct 2018 20:31:20 +0200 Subject: [PATCH 18/23] parisc: Use zdep for shlw macro on PA1.1 and PA2.0 The zdep and depw,z mnemonics generate the same code. The assembler will accept the depw,z mnemonic when generating PA 1.x code. The zdep mnemonic is okay when generating PA 2.0 code. This patch changes depw,z to zdep in the current shlw macro, while the binary code will be the same. Signed-off-by: Helge Deller Signed-off-by: John David Anglin --- arch/parisc/include/asm/assembly.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index e9c6385ef0d1..c17ec0ee6e7c 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -129,15 +129,8 @@ .macro debug value .endm - - /* Shift Left - note the r and t can NOT be the same! */ - .macro shl r, sa, t - dep,z \r, 31-(\sa), 32-(\sa), \t - .endm - - /* The PA 2.0 shift left */ .macro shlw r, sa, t - depw,z \r, 31-(\sa), 32-(\sa), \t + zdep \r, 31-(\sa), 32-(\sa), \t .endm /* And the PA 2.0W shift left */ From 32c1ceeabda3e37dd94129f915823e7cf0441fc9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Oct 2018 20:36:15 +0200 Subject: [PATCH 19/23] parisc: Drop two instructions from pte lookup code Remove two instruction from the hot path. The temporary move to %r9 is unneccessary, and the zero-inialization of pte happens twice. Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index d3e2633cd688..1c60408a64ad 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -427,8 +427,7 @@ ldw,s \index(\pmd),\pmd bb,>=,n \pmd,_PxD_PRESENT_BIT,\fault dep %r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */ - copy \pmd,%r9 - SHLREG %r9,PxD_VALUE_SHIFT,\pmd + SHLREG \pmd,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ @@ -447,7 +446,6 @@ .macro L3_ptep pgd,pte,index,va,fault #if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */ extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index - copy %r0,\pte extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 ldw,s \index(\pgd),\pgd extrd,u,*= \va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0 From fe8376dbbd6ac1febb0fd6389e3ec4f349e70c71 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Oct 2018 22:06:36 +0200 Subject: [PATCH 20/23] parisc: Add PDC PAT cell_info() and pd_get_pdc_revisions() functions Add wrappers for the PDC_PAT_CELL_GET_INFO and PDC_PAT_PD_GET_PDC_INTERF_REV PAT PDC subfunctions. Both provide access to the PAT capability bitfield which can guide us if simultaneous PTLBs are allowed on the bus, and if firmware will rendezvous all processors within PDCE_Check in case of an HPMC. Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdcpat.h | 54 ++++++++++++++++++++++-------- arch/parisc/kernel/firmware.c | 57 ++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h index 7bf63fb87455..bce9ee1c1c99 100644 --- a/arch/parisc/include/asm/pdcpat.h +++ b/arch/parisc/include/asm/pdcpat.h @@ -173,6 +173,16 @@ /* PDC PAT PD */ #define PDC_PAT_PD 74L /* Protection Domain Info */ #define PDC_PAT_PD_GET_ADDR_MAP 0L /* Get Address Map */ +#define PDC_PAT_PD_GET_PDC_INTERF_REV 1L /* Get PDC Interface Revisions */ + +#define PDC_PAT_CAPABILITY_BIT_PDC_SERIALIZE (1UL << 0) +#define PDC_PAT_CAPABILITY_BIT_PDC_POLLING (1UL << 1) +#define PDC_PAT_CAPABILITY_BIT_PDC_NBC (1UL << 2) /* non-blocking calls */ +#define PDC_PAT_CAPABILITY_BIT_PDC_UFO (1UL << 3) +#define PDC_PAT_CAPABILITY_BIT_PDC_IODC_32 (1UL << 4) +#define PDC_PAT_CAPABILITY_BIT_PDC_IODC_64 (1UL << 5) +#define PDC_PAT_CAPABILITY_BIT_PDC_HPMC_RENDEZ (1UL << 6) +#define PDC_PAT_CAPABILITY_BIT_SIMULTANEOUS_PTLB (1UL << 7) /* PDC_PAT_PD_GET_ADDR_MAP entry types */ #define PAT_MEMORY_DESCRIPTOR 1 @@ -305,18 +315,29 @@ struct pdc_pat_pd_addr_map_entry { ** PDC_PAT_CELL_GET_INFO return block */ typedef struct pdc_pat_cell_info_rtn_block { - unsigned long cpu_info; - unsigned long cell_info; - unsigned long cell_location; - unsigned long reo_location; - unsigned long mem_size; - unsigned long dimm_status; unsigned long pdc_rev; - unsigned long fabric_info0; - unsigned long fabric_info1; - unsigned long fabric_info2; - unsigned long fabric_info3; - unsigned long reserved[21]; + unsigned long capabilities; /* see PDC_PAT_CAPABILITY_BIT_* */ + unsigned long reserved0[2]; + unsigned long cell_info; /* 0x20 */ + unsigned long cell_phys_location; + unsigned long cpu_info; + unsigned long cpu_speed; + unsigned long io_chassis_phys_location; + unsigned long cell_io_information; + unsigned long reserved1[2]; + unsigned long io_slot_info_size; /* 0x60 */ + struct { + unsigned long header, info0, info1; + unsigned long phys_loc, hw_path; + } io_slot[16]; + unsigned long cell_mem_size; /* 0x2e8 */ + unsigned long cell_dimm_info_size; + unsigned long dimm_info[16]; + unsigned long fabric_info_size; /* 0x3f8 */ + struct { /* 0x380 */ + unsigned long fabric_info_xbc_port; + unsigned long rc_attached_to_xbc; + } xbc[8*4]; } pdc_pat_cell_info_rtn_block_t; @@ -334,12 +355,19 @@ typedef struct pdc_pat_cell_mod_maddr_block pdc_pat_cell_mod_maddr_block_t; extern int pdc_pat_chassis_send_log(unsigned long status, unsigned long data); extern int pdc_pat_cell_get_number(struct pdc_pat_cell_num *cell_info); -extern int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long mod, unsigned long view_type, void *mem_addr); +extern int pdc_pat_cell_info(struct pdc_pat_cell_info_rtn_block *info, + unsigned long *actcnt, unsigned long offset, + unsigned long cell_number); +extern int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, + unsigned long mod, unsigned long view_type, void *mem_addr); extern int pdc_pat_cell_num_to_loc(void *, unsigned long); extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, unsigned long hpa); -extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset); +extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, + unsigned long count, unsigned long offset); +extern int pdc_pat_pd_get_pdc_revisions(unsigned long *legacy_rev, + unsigned long *pat_rev, unsigned long *pdc_cap); extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *val); extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 6d471c00c71a..e6f3b49f2fd7 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1325,6 +1325,36 @@ int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long return retval; } +/** + * pdc_pat_cell_info - Retrieve the cell's information. + * @info: The pointer to a struct pdc_pat_cell_info_rtn_block. + * @actcnt: The number of bytes which should be written to info. + * @offset: offset of the structure. + * @cell_number: The cell number which should be asked, or -1 for current cell. + * + * This PDC call returns information about the given cell (or all cells). + */ +int pdc_pat_cell_info(struct pdc_pat_cell_info_rtn_block *info, + unsigned long *actcnt, unsigned long offset, + unsigned long cell_number) +{ + int retval; + unsigned long flags; + struct pdc_pat_cell_info_rtn_block result; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PAT_CELL, PDC_PAT_CELL_GET_INFO, + __pa(pdc_result), __pa(&result), *actcnt, + offset, cell_number); + if (!retval) { + *actcnt = pdc_result[0]; + memcpy(info, &result, *actcnt); + } + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + /** * pdc_pat_cpu_get_number - Retrieve the cpu number. * @cpu_info: The return buffer. @@ -1412,6 +1442,33 @@ int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, return retval; } +/** + * pdc_pat_pd_get_PDC_interface_revisions - Retrieve PDC interface revisions. + * @legacy_rev: The legacy revision. + * @pat_rev: The PAT revision. + * @pdc_cap: The PDC capabilities. + * + */ +int pdc_pat_pd_get_pdc_revisions(unsigned long *legacy_rev, + unsigned long *pat_rev, unsigned long *pdc_cap) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PAT_PD, PDC_PAT_PD_GET_PDC_INTERF_REV, + __pa(pdc_result)); + if (retval == PDC_OK) { + *legacy_rev = pdc_result[0]; + *pat_rev = pdc_result[1]; + *pdc_cap = pdc_result[2]; + } + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + + /** * pdc_pat_io_pci_cfg_read - Read PCI configuration space. * @pci_addr: PCI configuration space address for which the read request is being made. From 5a23237f14c5f35afbbf9d4a5bdfb2421a7af665 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 19 Oct 2018 20:33:29 -0400 Subject: [PATCH 21/23] parisc: Remove pte_inserted define The attached change removes the pte_inserted from pgtable.h. As a result, we always flush the TLB entry when the associated page table entry is changed. This change doesn't impact performance signifcantly and it may catch some cases where the TLB needs flushing but wasn't. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgtable.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 94c0ef7a9e03..b941ac7d4e70 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -55,10 +55,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) *(pteptr) = (pteval); \ } while(0) -#define pte_inserted(x) \ - ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ - == (_PAGE_PRESENT|_PAGE_ACCESSED)) - #define set_pte_at(mm, addr, ptep, pteval) \ do { \ pte_t old_pte; \ @@ -66,8 +62,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) spin_lock_irqsave(&pa_tlb_lock, flags); \ old_pte = *ptep; \ set_pte(ptep, pteval); \ - if (pte_inserted(old_pte)) \ - purge_tlb_entries(mm, addr); \ + purge_tlb_entries(mm, addr); \ spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) @@ -493,8 +488,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; set_pte(ptep, __pte(0)); - if (pte_inserted(old_pte)) - purge_tlb_entries(mm, addr); + purge_tlb_entries(mm, addr); spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; From 4c5fe5db1ac6420ec9c0a62ab764572ef79472b3 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 19 Oct 2018 20:48:12 -0400 Subject: [PATCH 22/23] parisc: Optimze cache flush algorithms The attached patch implements three optimizations: 1) Loops in flush_user_dcache_range_asm, flush_kernel_dcache_range_asm, purge_kernel_dcache_range_asm, flush_user_icache_range_asm, and flush_kernel_icache_range_asm are unrolled to reduce branch overhead. 2) The static branch prediction for cmpb instructions in pacache.S have been reviewed and the operand order adjusted where necessary. 3) For flush routines in cache.c, we purge rather flush when we have no context. The pdc instruction at level 0 is not required to write back dirty lines to memory. This provides a performance improvement over the fdc instruction if the feature is implemented. Version 2 adds alternative patching. The patch provides an average improvement of about 2%. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 33 +++++- arch/parisc/kernel/pacache.S | 216 ++++++++++++++++++++++++++++++++--- 2 files changed, 229 insertions(+), 20 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 56dc9791ee23..804880efa11e 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -36,6 +36,7 @@ EXPORT_SYMBOL(dcache_stride); void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); EXPORT_SYMBOL(flush_dcache_page_asm); +void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); @@ -303,6 +304,17 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, preempt_enable(); } +static inline void +__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, + unsigned long physaddr) +{ + preempt_disable(); + purge_dcache_page_asm(physaddr, vmaddr); + if (vma->vm_flags & VM_EXEC) + flush_icache_page_asm(physaddr, vmaddr); + preempt_enable(); +} + void flush_dcache_page(struct page *page) { struct address_space *mapping = page_mapping_file(page); @@ -563,9 +575,12 @@ void flush_cache_mm(struct mm_struct *mm) pfn = pte_pfn(*ptep); if (!pfn_valid(pfn)) continue; - if (unlikely(mm->context)) + if (unlikely(mm->context)) { flush_tlb_page(vma, addr); - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + } else { + __purge_cache_page(vma, addr, PFN_PHYS(pfn)); + } } } } @@ -600,9 +615,12 @@ void flush_cache_range(struct vm_area_struct *vma, continue; pfn = pte_pfn(*ptep); if (pfn_valid(pfn)) { - if (unlikely(vma->vm_mm->context)) + if (unlikely(vma->vm_mm->context)) { flush_tlb_page(vma, addr); - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + } else { + __purge_cache_page(vma, addr, PFN_PHYS(pfn)); + } } } } @@ -611,9 +629,12 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { if (pfn_valid(pfn)) { - if (likely(vma->vm_mm->context)) + if (likely(vma->vm_mm->context)) { flush_tlb_page(vma, vmaddr); - __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + } else { + __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + } } } diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index b41c0136a05f..187f032c9dd8 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -838,8 +838,7 @@ ENTRY_CFI(flush_dcache_page_asm) add %r28, %r25, %r25 sub %r25, r31, %r25 - -1: fdc,m r31(%r28) +1: fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) @@ -854,7 +853,7 @@ ENTRY_CFI(flush_dcache_page_asm) fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) - cmpb,COND(<<) %r28, %r25,1b + cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ fdc,m r31(%r28) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -863,6 +862,67 @@ ENTRY_CFI(flush_dcache_page_asm) nop ENDPROC_CFI(flush_dcache_page_asm) +ENTRY_CFI(purge_dcache_page_asm) + ldil L%(TMPALIAS_MAP_START), %r28 +#ifdef CONFIG_64BIT +#if (TMPALIAS_MAP_START >= 0x80000000) + depdi 0, 31,32, %r28 /* clear any sign extension */ +#endif + convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ + depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ + depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#else + extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ + depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ + depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#endif + + /* Purge any old translation */ + +#ifdef CONFIG_PA20 + pdtlb,l %r0(%r28) +#else + tlb_lock %r20,%r21,%r22 +0: pdtlb %r0(%r28) + tlb_unlock %r20,%r21,%r22 + ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) +#endif + +88: ldil L%dcache_stride, %r1 + ldw R%dcache_stride(%r1), r31 + +#ifdef CONFIG_64BIT + depdi,z 1, 63-PAGE_SHIFT,1, %r25 +#else + depwi,z 1, 31-PAGE_SHIFT,1, %r25 +#endif + add %r28, %r25, %r25 + sub %r25, r31, %r25 + +1: pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ + pdc,m r31(%r28) + +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) + sync + bv %r0(%r2) + nop +ENDPROC_CFI(purge_dcache_page_asm) + ENTRY_CFI(flush_icache_page_asm) ldil L%(TMPALIAS_MAP_START), %r28 #ifdef CONFIG_64BIT @@ -908,7 +968,6 @@ ENTRY_CFI(flush_icache_page_asm) add %r28, %r25, %r25 sub %r25, %r31, %r25 - /* fic only has the type 26 form on PA1.1, requiring an * explicit space specification, so use %sr4 */ 1: fic,m %r31(%sr4,%r28) @@ -926,7 +985,7 @@ ENTRY_CFI(flush_icache_page_asm) fic,m %r31(%sr4,%r28) fic,m %r31(%sr4,%r28) fic,m %r31(%sr4,%r28) - cmpb,COND(<<) %r28, %r25,1b + cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ fic,m %r31(%sr4,%r28) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) @@ -947,7 +1006,6 @@ ENTRY_CFI(flush_kernel_dcache_page_asm) add %r26, %r25, %r25 sub %r25, %r23, %r25 - 1: fdc,m %r23(%r26) fdc,m %r23(%r26) fdc,m %r23(%r26) @@ -963,7 +1021,7 @@ ENTRY_CFI(flush_kernel_dcache_page_asm) fdc,m %r23(%r26) fdc,m %r23(%r26) fdc,m %r23(%r26) - cmpb,COND(<<) %r26, %r25,1b + cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ fdc,m %r23(%r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -999,7 +1057,7 @@ ENTRY_CFI(purge_kernel_dcache_page_asm) pdc,m %r23(%r26) pdc,m %r23(%r26) pdc,m %r23(%r26) - cmpb,COND(<<) %r26, %r25, 1b + cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ pdc,m %r23(%r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -1014,7 +1072,33 @@ ENTRY_CFI(flush_user_dcache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25, 1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fdc,m %r23(%sr3, %r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b fdc,m %r23(%sr3, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -1029,7 +1113,33 @@ ENTRY_CFI(flush_kernel_dcache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25,1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fdc,m %r23(%r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ fdc,m %r23(%r26) sync @@ -1045,7 +1155,33 @@ ENTRY_CFI(purge_kernel_dcache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25,1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + pdc,m %r23(%r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ pdc,m %r23(%r26) sync @@ -1061,7 +1197,33 @@ ENTRY_CFI(flush_user_icache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25,1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fic,m %r23(%sr3, %r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b fic,m %r23(%sr3, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) @@ -1098,7 +1260,7 @@ ENTRY_CFI(flush_kernel_icache_page) fic,m %r23(%sr4, %r26) fic,m %r23(%sr4, %r26) fic,m %r23(%sr4, %r26) - cmpb,COND(<<) %r26, %r25, 1b + cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ fic,m %r23(%sr4, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) @@ -1113,7 +1275,33 @@ ENTRY_CFI(flush_kernel_icache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25, 1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fic,m %r23(%sr4, %r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ fic,m %r23(%sr4, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) From e543b3a620296a18aaf0d66475b68d6a85e8fcd4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Oct 2018 22:13:49 +0200 Subject: [PATCH 23/23] parisc: Retrieve and display the PDC PAT capabilities Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdc.h | 1 + arch/parisc/kernel/inventory.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index 339e83ddb39e..5b187d40d604 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -11,6 +11,7 @@ extern int parisc_narrow_firmware; extern int pdc_type; extern unsigned long parisc_cell_num; /* cell number the CPU runs on (PAT) */ extern unsigned long parisc_cell_loc; /* cell location of CPU (PAT) */ +extern unsigned long parisc_pat_pdc_cap; /* PDC capabilities (PAT) */ /* Values for pdc_type */ #define PDC_TYPE_ILLEGAL -1 diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index b0fe19ac4d78..35d05fdd7483 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -43,6 +43,7 @@ int pdc_type __read_mostly = PDC_TYPE_ILLEGAL; /* cell number and location (PAT firmware only) */ unsigned long parisc_cell_num __read_mostly; unsigned long parisc_cell_loc __read_mostly; +unsigned long parisc_pat_pdc_cap __read_mostly; void __init setup_pdc(void) @@ -81,12 +82,21 @@ void __init setup_pdc(void) #ifdef CONFIG_64BIT status = pdc_pat_cell_get_number(&cell_info); if (status == PDC_OK) { + unsigned long legacy_rev, pat_rev; pdc_type = PDC_TYPE_PAT; pr_cont("64 bit PAT.\n"); parisc_cell_num = cell_info.cell_num; parisc_cell_loc = cell_info.cell_loc; pr_info("PAT: Running on cell %lu and location %lu.\n", parisc_cell_num, parisc_cell_loc); + status = pdc_pat_pd_get_pdc_revisions(&legacy_rev, + &pat_rev, &parisc_pat_pdc_cap); + pr_info("PAT: legacy revision 0x%lx, pat_rev 0x%lx, pdc_cap 0x%lx, S-PTLB %d, HPMC_RENDEZ %d.\n", + legacy_rev, pat_rev, parisc_pat_pdc_cap, + parisc_pat_pdc_cap + & PDC_PAT_CAPABILITY_BIT_SIMULTANEOUS_PTLB ? 1:0, + parisc_pat_pdc_cap + & PDC_PAT_CAPABILITY_BIT_PDC_HPMC_RENDEZ ? 1:0); return; } #endif