From a2b05b7aa60e1e9b60faf01dfb1cca35638d1ab1 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 May 2017 21:24:41 +1000
Subject: [PATCH 01/25] powerpc/64s: Add dt_cpu_ftrs boot time setup option

Provide a dt_cpu_ftrs= cmdline option to disable the dt_cpu_ftrs CPU
feature discovery, and fall back to the "cputable" based version.

Also allow control of advertising unknown features to userspace and
with this parameter, and remove the clunky CONFIG option.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Add explicit early check of bootargs in dt_cpu_ftrs_init()]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../admin-guide/kernel-parameters.txt         |  9 +++
 arch/powerpc/Kconfig                          |  5 --
 arch/powerpc/kernel/dt_cpu_ftrs.c             | 57 ++++++++++++++++---
 3 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 15f79c27748d..0f5c3b4347c6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -866,6 +866,15 @@
 
 	dscc4.setup=	[NET]
 
+	dt_cpu_ftrs=	[PPC]
+			Format: {"off" | "known"}
+			Control how the dt_cpu_ftrs device-tree binding is
+			used for CPU feature discovery and setup (if it
+			exists).
+			off: Do not use it, fall back to legacy cpu table.
+			known: Do not pass through unknown features to guests
+			or userspace, only those that the kernel is aware of.
+
 	dump_apple_properties	[X86]
 			Dump name and content of EFI device properties on
 			x86 Macs.  Useful for driver authors to determine
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f7c8f9972f61..4a4a05afcaf7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -391,11 +391,6 @@ config PPC_DT_CPU_FTRS
 	  firmware provides this binding.
 	  If you're not sure say Y.
 
-config PPC_CPUFEATURES_ENABLE_UNKNOWN
-	bool "cpufeatures pass through unknown features to guest/userspace"
-	depends on PPC_DT_CPU_FTRS
-	default y
-
 config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index fcc7588a96d6..76eebba13a1d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -8,6 +8,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/jump_label.h>
+#include <linux/libfdt.h>
 #include <linux/memblock.h>
 #include <linux/printk.h>
 #include <linux/sched.h>
@@ -671,12 +672,24 @@ static struct dt_cpu_feature_match __initdata
 	{"wait-v3", feat_enable, 0},
 };
 
-/* XXX: how to configure this? Default + boot time? */
-#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
-#define CPU_FEATURE_ENABLE_UNKNOWN 1
-#else
-#define CPU_FEATURE_ENABLE_UNKNOWN 0
-#endif
+static bool __initdata using_dt_cpu_ftrs;
+static bool __initdata enable_unknown = true;
+
+static int __init dt_cpu_ftrs_parse(char *str)
+{
+	if (!str)
+		return 0;
+
+	if (!strcmp(str, "off"))
+		using_dt_cpu_ftrs = false;
+	else if (!strcmp(str, "known"))
+		enable_unknown = false;
+	else
+		return 1;
+
+	return 0;
+}
+early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
 
 static void __init cpufeatures_setup_start(u32 isa)
 {
@@ -707,7 +720,7 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
 		}
 	}
 
-	if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
+	if (!known && enable_unknown) {
 		if (!feat_try_enable_unknown(f)) {
 			pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
 				f->name);
@@ -756,6 +769,26 @@ static void __init cpufeatures_setup_finished(void)
 		cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
 }
 
+static int __init disabled_on_cmdline(void)
+{
+	unsigned long root, chosen;
+	const char *p;
+
+	root = of_get_flat_dt_root();
+	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+	if (chosen == -FDT_ERR_NOTFOUND)
+		return false;
+
+	p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
+	if (!p)
+		return false;
+
+	if (strstr(p, "dt_cpu_ftrs=off"))
+		return true;
+
+	return false;
+}
+
 static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
 					int depth, void *data)
 {
@@ -766,8 +799,6 @@ static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
 	return 0;
 }
 
-static bool __initdata using_dt_cpu_ftrs = false;
-
 bool __init dt_cpu_ftrs_in_use(void)
 {
 	return using_dt_cpu_ftrs;
@@ -775,6 +806,8 @@ bool __init dt_cpu_ftrs_in_use(void)
 
 bool __init dt_cpu_ftrs_init(void *fdt)
 {
+	using_dt_cpu_ftrs = false;
+
 	/* Setup and verify the FDT, if it fails we just bail */
 	if (!early_init_dt_verify(fdt))
 		return false;
@@ -782,6 +815,9 @@ bool __init dt_cpu_ftrs_init(void *fdt)
 	if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
 		return false;
 
+	if (disabled_on_cmdline())
+		return false;
+
 	cpufeatures_setup_cpu();
 
 	using_dt_cpu_ftrs = true;
@@ -1027,5 +1063,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 
 void __init dt_cpu_ftrs_scan(void)
 {
+	if (!using_dt_cpu_ftrs)
+		return;
+
 	of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
 }

From 99acc9bede06bbb2662aafff51f5b9e529fa845e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 29 May 2017 20:26:07 +1000
Subject: [PATCH 02/25] powerpc/spufs: Fix coredump of SPU contexts

If a process dumps core while it has SPU contexts active then we have
code to also dump information about the SPU contexts.

Unfortunately it's been broken for 3 1/2 years, and we didn't notice. In
commit 7b1f4020d0d1 ("spufs: get rid of dump_emit() wrappers") the nread
variable was removed and rc used instead. That means when the loop exits
successfully, rc has the number of bytes read, but it's then used as the
return value for the function, which should return 0 on success.

So fix it by setting rc = 0 before returning in the success case.

Fixes: 7b1f4020d0d1 ("spufs: get rid of dump_emit() wrappers")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/cell/spufs/coredump.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index e5a891ae80ee..84b7ac926ce6 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -175,6 +175,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
 	if (!dump_skip(cprm, skip))
 		goto Eio;
+
+	rc = 0;
 out:
 	free_page((unsigned long)buf);
 	return rc;

From 6f553912eedafae13ff20b322a65e471fe7f5236 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 24 May 2017 10:01:55 +0200
Subject: [PATCH 03/25] powerpc/sysdev/simple_gpio: Fix oops in gpio save_regs
 function

of_mm_gpiochip_add_data() generates an oops for NULL pointer dereference.

of_mm_gpiochip_add_data() calls mm_gc->save_regs() before
setting the data, therefore ->save_regs() cannot use gpiochip_get_data()

Fixes: 937daafca774 ("powerpc: simple-gpio: use gpiochip data pointer")
Cc: stable@vger.kernel.org # v4.7+
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/simple_gpio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ef470b470b04..6afddae2fb47 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -75,7 +75,8 @@ static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-	struct u8_gpio_chip *u8_gc = gpiochip_get_data(&mm_gc->gc);
+	struct u8_gpio_chip *u8_gc =
+		container_of(mm_gc, struct u8_gpio_chip, mm_gc);
 
 	u8_gc->data = in_8(mm_gc->regs);
 }

From dc421b200f91930c9c6a9586810ff8c232cf10fc Mon Sep 17 00:00:00 2001
From: Michael Bringmann <mwb@linux.vnet.ibm.com>
Date: Mon, 22 May 2017 15:44:37 -0500
Subject: [PATCH 04/25] powerpc/hotplug-mem: Fix missing endian conversion of
 aa_index

When adding or removing memory, the aa_index (affinity value) for the
memblock must also be converted to match the endianness of the rest
of the 'ibm,dynamic-memory' property.  Otherwise, subsequent retrieval
of the attribute will likely lead to non-existent nodes, followed by
using the default node in the code inappropriately.

Fixes: 5f97b2a0d176 ("powerpc/pseries: Implement memory hotplug add in the kernel")
Cc: stable@vger.kernel.org # v4.1+
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e104c71ea44a..1fb162ba9d1c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -124,6 +124,7 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
 	for (i = 0; i < num_lmbs; i++) {
 		lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
 		lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+		lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
 		lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
 	}
 
@@ -147,6 +148,7 @@ static void dlpar_update_drconf_property(struct device_node *dn,
 	for (i = 0; i < num_lmbs; i++) {
 		lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
 		lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+		lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
 		lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
 	}
 

From 0e5e7f5e9700661c3ddd95501743fb52fec1ab07 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 25 May 2017 16:33:52 +1000
Subject: [PATCH 05/25] powerpc/64: Reclaim CPU_FTR_SUBCORE

We are running low on CPU feature bits, so we only want to use them when
it's really necessary.

CPU_FTR_SUBCORE is only used in one place, and only in C, so we don't
need it in order to make asm patching work. It can only be set on
"Power8" CPUs, which in practice means POWER8, POWER8E and POWER8NVL.
There are no plans to implement it on future CPUs, but if there ever
were we could retrofit it then.

Although KVM uses subcores, it never looks at the CPU feature, it either
looks at the ISA level or the threads_per_subcore value.

So drop the CPU feature and do a PVR check instead. Drop the device tree
"subcore" feature as we no longer support doing anything with it, and we
will drop it from skiboot too.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cputable.h      | 3 +--
 arch/powerpc/kernel/dt_cpu_ftrs.c        | 1 -
 arch/powerpc/platforms/powernv/subcore.c | 8 +++++++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index c2d509584a98..d02ad93bf708 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -214,7 +214,6 @@ enum {
 #define CPU_FTR_DAWR			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_DABRX			LONG_ASM_CONST(0x0800000000000000)
 #define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_SUBCORE			LONG_ASM_CONST(0x2000000000000000)
 #define CPU_FTR_POWER9_DD1		LONG_ASM_CONST(0x4000000000000000)
 
 #ifndef __ASSEMBLY__
@@ -463,7 +462,7 @@ enum {
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
+	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
 #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 76eebba13a1d..4c7656dc4e04 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -643,7 +643,6 @@ static struct dt_cpu_feature_match __initdata
 	{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
 	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
 	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
-	{"subcore", feat_enable, CPU_FTR_SUBCORE},
 	{"no-execute", feat_enable, 0},
 	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
 	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 0babef11136f..8c6119280c13 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -407,7 +407,13 @@ static DEVICE_ATTR(subcores_per_core, 0644,
 
 static int subcore_init(void)
 {
-	if (!cpu_has_feature(CPU_FTR_SUBCORE))
+	unsigned pvr_ver;
+
+	pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+	if (pvr_ver != PVR_POWER8 &&
+	    pvr_ver != PVR_POWER8E &&
+	    pvr_ver != PVR_POWER8NVL)
 		return 0;
 
 	/*

From 1195892c091a15cc862f4e202482a36adc924e12 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Fri, 2 Jun 2017 18:43:30 -0300
Subject: [PATCH 06/25] powerpc/kernel: Fix FP and vector register restoration

Currently tsk->thread->load_vec and load_fp are not initialized during
task creation, which can lead to garbage values in these variables (non-zero
values).

These variables will be checked later in restore_math() to validate if the
FP and vector registers are being utilized. Since these values might be
non-zero, the restore_math() will continue to save the FP and vectors even if
they were never utilized by the userspace application. load_fp and load_vec
counters will then overflow (they wrap at 255) and the FP and Altivec will be
finally disabled, but before that condition is reached (counter overflow)
several context switches will have restored FP and vector registers without
need, causing a performance degradation.

Fixes: 70fe3d980f5f ("powerpc: Restore FPU/VEC/VSX if previously used")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Gustavo Romero <gusbromero@gmail.com>
Acked-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index baae104b16c7..a9435397eab8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1666,6 +1666,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
 #endif
+	current->thread.load_fp = 0;
 	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
 	current->thread.fp_save_area = NULL;
 #ifdef CONFIG_ALTIVEC
@@ -1674,6 +1675,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.vr_save_area = NULL;
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
+	current->thread.load_vec = 0;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_SPE
 	memset(current->thread.evr, 0, sizeof(current->thread.evr));

From 7f22ced4377628074e2ac25f41a88f98eb3b03f1 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 5 Jun 2017 11:40:59 -0300
Subject: [PATCH 07/25] powerpc/kernel: Initialize load_tm on task creation

Currently tsk->thread.load_tm is not initialized in the task creation
and can contain garbage on a new task.

This is an undesired behaviour, since it affects the timing to enable
and disable the transactional memory laziness (disabling and enabling
the MSR TM bit, which affects TM reclaim and recheckpoint in the
scheduling process).

Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a9435397eab8..2ad725ef4368 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1687,6 +1687,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.tm_tfhar = 0;
 	current->thread.tm_texasr = 0;
 	current->thread.tm_tfiar = 0;
+	current->thread.load_tm = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 }
 EXPORT_SYMBOL(start_thread);

From b3aa20ba2ba8072b73bd799605b8c98927b7056c Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Fri, 2 Jun 2017 22:26:48 +0530
Subject: [PATCH 08/25] cxl: Avoid double free_irq() for psl,slice interrupts

During an eeh call to cxl_remove can result in double free_irq of
psl,slice interrupts. This can happen if perst_reloads_same_image == 1
and call to cxl_configure_adapter() fails during slot_reset
callback. In such a case we see a kernel oops with following back-trace:

Oops: Kernel access of bad area, sig: 11 [#1]
Call Trace:
  free_irq+0x88/0xd0 (unreliable)
  cxl_unmap_irq+0x20/0x40 [cxl]
  cxl_native_release_psl_irq+0x78/0xd8 [cxl]
  pci_deconfigure_afu+0xac/0x110 [cxl]
  cxl_remove+0x104/0x210 [cxl]
  pci_device_remove+0x6c/0x110
  device_release_driver_internal+0x204/0x2e0
  pci_stop_bus_device+0xa0/0xd0
  pci_stop_and_remove_bus_device+0x28/0x40
  pci_hp_remove_devices+0xb0/0x150
  pci_hp_remove_devices+0x68/0x150
  eeh_handle_normal_event+0x140/0x580
  eeh_handle_event+0x174/0x360
  eeh_event_handler+0x1e8/0x1f0

This patch fixes the issue of double free_irq by checking that
variables that hold the virqs (err_hwirq, serr_hwirq, psl_virq) are
not '0' before un-mapping and resetting these variables to '0' when
they are un-mapped.

Cc: stable@vger.kernel.org
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/native.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 871a2f09c718..8d6ea9712dbd 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -1302,13 +1302,16 @@ int cxl_native_register_psl_err_irq(struct cxl *adapter)
 
 void cxl_native_release_psl_err_irq(struct cxl *adapter)
 {
-	if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq))
+	if (adapter->native->err_virq == 0 ||
+	    adapter->native->err_virq !=
+	    irq_find_mapping(NULL, adapter->native->err_hwirq))
 		return;
 
 	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
 	cxl_unmap_irq(adapter->native->err_virq, adapter);
 	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
 	kfree(adapter->irq_name);
+	adapter->native->err_virq = 0;
 }
 
 int cxl_native_register_serr_irq(struct cxl_afu *afu)
@@ -1346,13 +1349,15 @@ int cxl_native_register_serr_irq(struct cxl_afu *afu)
 
 void cxl_native_release_serr_irq(struct cxl_afu *afu)
 {
-	if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+	if (afu->serr_virq == 0 ||
+	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
 		return;
 
 	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
 	cxl_unmap_irq(afu->serr_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
 	kfree(afu->err_irq_name);
+	afu->serr_virq = 0;
 }
 
 int cxl_native_register_psl_irq(struct cxl_afu *afu)
@@ -1375,12 +1380,15 @@ int cxl_native_register_psl_irq(struct cxl_afu *afu)
 
 void cxl_native_release_psl_irq(struct cxl_afu *afu)
 {
-	if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq))
+	if (afu->native->psl_virq == 0 ||
+	    afu->native->psl_virq !=
+	    irq_find_mapping(NULL, afu->native->psl_hwirq))
 		return;
 
 	cxl_unmap_irq(afu->native->psl_virq, afu);
 	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
 	kfree(afu->psl_irq_name);
+	afu->native->psl_virq = 0;
 }
 
 static void recover_psl_err(struct cxl_afu *afu, u64 errstat)

From ba4a648f12f4cd0a8003dd229b6ca8a53348ee4b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 6 Jun 2017 20:23:57 +1000
Subject: [PATCH 09/25] powerpc/numa: Fix percpu allocations to be NUMA aware

In commit 8c272261194d ("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID"), we
switched to the generic implementation of cpu_to_node(), which uses a percpu
variable to hold the NUMA node for each CPU.

Unfortunately we neglected to notice that we use cpu_to_node() in the allocation
of our percpu areas, leading to a chicken and egg problem. In practice what
happens is when we are setting up the percpu areas, cpu_to_node() reports that
all CPUs are on node 0, so we allocate all percpu areas on node 0.

This is visible in the dmesg output, as all pcpu allocs being in group 0:

  pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07
  pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15
  pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23
  pcpu-alloc: [0] 24 25 26 27 [0] 28 29 30 31
  pcpu-alloc: [0] 32 33 34 35 [0] 36 37 38 39
  pcpu-alloc: [0] 40 41 42 43 [0] 44 45 46 47

To fix it we need an early_cpu_to_node() which can run prior to percpu being
setup. We already have the numa_cpu_lookup_table we can use, so just plumb it
in. With the patch dmesg output shows two groups, 0 and 1:

  pcpu-alloc: [0] 00 01 02 03 [0] 04 05 06 07
  pcpu-alloc: [0] 08 09 10 11 [0] 12 13 14 15
  pcpu-alloc: [0] 16 17 18 19 [0] 20 21 22 23
  pcpu-alloc: [1] 24 25 26 27 [1] 28 29 30 31
  pcpu-alloc: [1] 32 33 34 35 [1] 36 37 38 39
  pcpu-alloc: [1] 40 41 42 43 [1] 44 45 46 47

We can also check the data_offset in the paca of various CPUs, with the fix we
see:

  CPU 0:  data_offset = 0x0ffe8b0000
  CPU 24: data_offset = 0x1ffe5b0000

And we can see from dmesg that CPU 24 has an allocation on node 1:

  node   0: [mem 0x0000000000000000-0x0000000fffffffff]
  node   1: [mem 0x0000001000000000-0x0000001fffffffff]

Cc: stable@vger.kernel.org # v3.16+
Fixes: 8c272261194d ("powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/topology.h | 14 ++++++++++++++
 arch/powerpc/kernel/setup_64.c      |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8b3b46b7b0f2..329771559cbb 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topology(void);
 extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 
+static inline int early_cpu_to_node(int cpu)
+{
+	int nid;
+
+	nid = numa_cpu_lookup_table[cpu];
+
+	/*
+	 * Fall back to node 0 if nid is unset (it should be, except bugs).
+	 * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+	 */
+	return (nid < 0) ? 0 : nid;
+}
 #else
 
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
 static inline void dump_numa_cpu_topology(void) {}
 
 static inline int sysfs_add_device_to_node(struct device *dev, int nid)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f35ff9dea4fb..a8c1f99e9607 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -661,7 +661,7 @@ void __init emergency_stack_init(void)
 
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-	return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
+	return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
 				    __pa(MAX_DMA_ADDRESS));
 }
 
@@ -672,7 +672,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-	if (cpu_to_node(from) == cpu_to_node(to))
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
 		return LOCAL_DISTANCE;
 	else
 		return REMOTE_DISTANCE;

From 8c218578fcbbbdb10416c8614658bf32e3bf1655 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Fri, 26 May 2017 13:38:27 +0530
Subject: [PATCH 10/25] powerpc/perf: Fix Power9 test_adder fields

Commit 8d911904f3ce4 ('powerpc/perf: Add restrictions to PMC5 in power9 DD1')
was added to restrict the use of PMC5 in Power9 DD1. Intention was to disable
the use of PMC5 using raw event code. But instead of updating the
power9_isa207_pmu structure (used on DD1), the commit incorrectly updated the
power9_pmu structure. Fix it.

Fixes: 8d911904f3ce ("powerpc/perf: Add restrictions to PMC5 in power9 DD1")
Reported-by: Shriya <shriyak@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Tested-by: Shriya <shriyak@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/power9-pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 018f8e90ac35..bb28e1a41257 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -402,7 +402,7 @@ static struct power_pmu power9_isa207_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= ISA207_TEST_ADDER,
+	.test_adder		= P9_DD1_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
@@ -421,7 +421,7 @@ static struct power_pmu power9_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= P9_DD1_TEST_ADDER,
+	.test_adder		= ISA207_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,

From cec422c11caeeccae709e9942058b6b644ce434c Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Date: Tue, 6 Jun 2017 11:43:41 +0200
Subject: [PATCH 11/25] cxl: Fix error path on bad ioctl

Fix error path if we can't copy user structure on CXL_IOCTL_START_WORK
ioctl. We shouldn't unlock the context status mutex as it was not
locked (yet).

Fixes: 0712dc7e73e5 ("cxl: Fix issues when unmapping contexts")
Cc: stable@vger.kernel.org # v3.19+
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/file.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 17b433f1ce23..0761271d68c5 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -159,11 +159,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 
 	/* Do this outside the status_mutex to avoid a circular dependency with
 	 * the locking in cxl_mmap_fault() */
-	if (copy_from_user(&work, uwork,
-			   sizeof(struct cxl_ioctl_start_work))) {
-		rc = -EFAULT;
-		goto out;
-	}
+	if (copy_from_user(&work, uwork, sizeof(work)))
+		return -EFAULT;
 
 	mutex_lock(&ctx->status_mutex);
 	if (ctx->status != OPENED) {

From 92d9dfda8b547cc292af27e11e11c9eff3bb574f Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 20:05:04 +0530
Subject: [PATCH 12/25] powerpc/mm/4k: Limit 4k page size config to 64TB
 virtual address space

Supporting 512TB requires us to do a order 3 allocation for level 1 page
table (pgd). This results in page allocation failures with certain workloads.
For now limit 4k linux page size config to 64TB.

Fixes: f6eedbba7a26 ("powerpc/mm/hash: Increase VA range to 128TB")
Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h |  2 +-
 arch/powerpc/include/asm/processor.h         | 25 ++++++++++----------
 arch/powerpc/kernel/setup-common.c           |  2 +-
 arch/powerpc/mm/mmu_context_book3s64.c       |  2 +-
 4 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index b4b5e6b671ca..0c4e470571ca 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -8,7 +8,7 @@
 #define H_PTE_INDEX_SIZE  9
 #define H_PMD_INDEX_SIZE  7
 #define H_PUD_INDEX_SIZE  9
-#define H_PGD_INDEX_SIZE  12
+#define H_PGD_INDEX_SIZE  9
 
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a2123f291ab0..bb99b651085a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -110,13 +110,18 @@ void release_thread(struct task_struct *);
 #define TASK_SIZE_128TB (0x0000800000000000UL)
 #define TASK_SIZE_512TB (0x0002000000000000UL)
 
-#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
 /*
  * Max value currently used:
  */
-#define TASK_SIZE_USER64	TASK_SIZE_512TB
+#define TASK_SIZE_USER64		TASK_SIZE_512TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_128TB
 #else
-#define TASK_SIZE_USER64	TASK_SIZE_64TB
+#define TASK_SIZE_USER64		TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_64TB
 #endif
 
 /*
@@ -132,7 +137,7 @@ void release_thread(struct task_struct *);
  * space during mmap's.
  */
 #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
 
 #define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
@@ -143,21 +148,15 @@ void release_thread(struct task_struct *);
  * with 128TB and conditionally enable upto 512TB
  */
 #ifdef CONFIG_PPC_BOOK3S_64
-#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ? \
-				 TASK_SIZE_USER32 : TASK_SIZE_128TB)
+#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ?			\
+				 TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
 #else
 #define DEFAULT_MAP_WINDOW	TASK_SIZE
 #endif
 
 #ifdef __powerpc64__
 
-#ifdef CONFIG_PPC_BOOK3S_64
-/* Limit stack to 128TB */
-#define STACK_TOP_USER64 TASK_SIZE_128TB
-#else
-#define STACK_TOP_USER64 TASK_SIZE_USER64
-#endif
-
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
 #define STACK_TOP_USER32 TASK_SIZE_USER32
 
 #define STACK_TOP (is_32bit_task() ? \
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 71dcda91755d..857129acf960 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -928,7 +928,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_PPC_MM_SLICES
 #ifdef CONFIG_PPC64
-	init_mm.context.addr_limit = TASK_SIZE_128TB;
+	init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
 #else
 #error	"context.addr_limit not initialized."
 #endif
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index c6dca2ae78ef..a3edf813d455 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 	 * mm->context.addr_limit. Default to max task size so that we copy the
 	 * default values to paca which will help us to handle slb miss early.
 	 */
-	mm->context.addr_limit = TASK_SIZE_128TB;
+	mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
 
 	/*
 	 * The old code would re-promote on fork, we don't do that when using

From c6ee9619e2edd9912316f7e2eaf9ffa14fafe9f9 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 8 Jun 2017 16:29:59 +1000
Subject: [PATCH 13/25] powerpc/book3s64: Move PPC_DT_CPU_FTRs and enable it by
 default

The PPC_DT_CPU_FTRs is a bit misplaced in menuconfig, it shows up with
other general kernel options. It's really more at home in the "Platform
Support" section, so move it there.

Also enable it by default, for Book3s 64. It does mostly nothing unless
the device tree properties are found, and we will want it enabled
eventually in distro kernels, so turn it on to start getting more
testing.

Fixes: 5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig           | 11 -----------
 arch/powerpc/platforms/Kconfig | 11 +++++++++++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4a4a05afcaf7..964da1891ea9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -380,17 +380,6 @@ source "arch/powerpc/platforms/Kconfig"
 
 menu "Kernel options"
 
-config PPC_DT_CPU_FTRS
-	bool "Device-tree based CPU feature discovery & setup"
-	depends on PPC_BOOK3S_64
-	default n
-	help
-	  This enables code to use a new device tree binding for describing CPU
-	  compatibility and features. Saying Y here will attempt to use the new
-	  binding if the firmware provides it. Currently only the skiboot
-	  firmware provides this binding.
-	  If you're not sure say Y.
-
 config HIGHMEM
 	bool "High memory support"
 	depends on PPC32
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 33244e3d9375..4fd64d3f5c44 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -59,6 +59,17 @@ config PPC_OF_BOOT_TRAMPOLINE
 
 	  In case of doubt, say Y
 
+config PPC_DT_CPU_FTRS
+	bool "Device-tree based CPU feature discovery & setup"
+	depends on PPC_BOOK3S_64
+	default y
+	help
+	  This enables code to use a new device tree binding for describing CPU
+	  compatibility and features. Saying Y here will attempt to use the new
+	  binding if the firmware provides it. Currently only the skiboot
+	  firmware provides this binding.
+	  If you're not sure say Y.
+
 config UDBG_RTAS_CONSOLE
 	bool "RTAS based debug console"
 	depends on PPC_RTAS

From 377aa6b0efbaa29cfeecd8b9244641217f9544ca Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Wed, 14 Jun 2017 14:47:50 +1000
Subject: [PATCH 14/25] powerpc/npu-dma: Remove spurious WARN_ON when a PCI
 device has no of_node

Commit 4c3b89effc28 ("powerpc/powernv: Add sanity checks to
pnv_pci_get_{gpu|npu}_dev") introduced explicit warnings in
pnv_pci_get_npu_dev() when a PCIe device has no associated device-tree
node. However not all PCIe devices have an of_node and
pnv_pci_get_npu_dev() gets indirectly called at least once for every
PCIe device in the system. This results in spurious WARN_ON()'s so
remove it.

The same situation should not exist for pnv_pci_get_gpu_dev() as any
NPU based PCIe device requires a device-tree node.

Fixes: 4c3b89effc28 ("powerpc/powernv: Add sanity checks to pnv_pci_get_{gpu|npu}_dev")
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 78fa9395b8c5..e6f444b46207 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
 	if (WARN_ON(!gpdev))
 		return NULL;
 
-	if (WARN_ON(!gpdev->dev.of_node))
+	/* Not all PCI devices have device-tree nodes */
+	if (!gpdev->dev.of_node)
 		return NULL;
 
 	/* Get assoicated PCI device */

From 25642705b2359a705784bbbf1655c25a8f8efde2 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 14 Jun 2017 10:19:25 +1000
Subject: [PATCH 15/25] powerpc/xive: Fix offset for store EOI MMIOs

Architecturally we should apply a 0x400 offset for these. Not doing
it will break future HW implementations.

The offset of 0 is supposed to remain for "triggers" though not all
sources support both trigger and store EOI, and in P9 specifically,
some sources will treat 0 as a store EOI. But future chips will not.
So this makes us use the properly architected offset which should work
always.

Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller")
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/xive.h         | 12 +++++++-----
 arch/powerpc/kvm/book3s_xive_template.c |  4 ++--
 arch/powerpc/sysdev/xive/common.c       |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index c8a822acf962..c23ff4389ca2 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -94,11 +94,13 @@ struct xive_q {
  * store at 0 and some ESBs support doing a trigger via a
  * separate trigger page.
  */
-#define XIVE_ESB_GET		0x800
-#define XIVE_ESB_SET_PQ_00	0xc00
-#define XIVE_ESB_SET_PQ_01	0xd00
-#define XIVE_ESB_SET_PQ_10	0xe00
-#define XIVE_ESB_SET_PQ_11	0xf00
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
 
 #define XIVE_ESB_VAL_P		0x2
 #define XIVE_ESB_VAL_Q		0x1
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 023a31133c37..4636ca6e7d38 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -69,7 +69,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 {
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		__x_writeq(0, __x_eoi_page(xd));
+		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
 	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 		opal_int_eoi(hw_irq);
 	} else {
@@ -89,7 +89,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 		 * properly.
 		 */
 		if (xd->flags & XIVE_IRQ_FLAG_LSI)
-			__x_readq(__x_eoi_page(xd));
+			__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
 		else {
 			eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
 
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 913825086b8d..8f5e3035483b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -297,7 +297,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 {
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-		out_be64(xd->eoi_mmio, 0);
+		out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
 	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 		/*
 		 * The FW told us to call it. This happens for some

From a093c92dc7f96a15de98ec8cfe38e6f7610a5969 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 14 Jun 2017 13:01:25 +1000
Subject: [PATCH 16/25] powerpc/debug: Add missing warn flag to WARN_ON's
 non-builtin path

When trapped on WARN_ON(), report_bug() is expected to return
BUG_TRAP_TYPE_WARN so the caller will increment NIP by 4 and continue.
The __builtin_constant_p() path of the PPC's WARN_ON()
calls (indirectly) __WARN_FLAGS() which has BUGFLAG_WARNING set,
however the other branch does not which makes report_bug() report a
bug rather than a warning.

Fixes: f26dee15103f ("debug: Avoid setting BUGFLAG_WARNING twice")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index f2c562a0a427..0151af6c2a50 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -104,7 +104,7 @@
 		"1:	"PPC_TLNEI"	%4,0\n"			\
 		_EMIT_BUG_ENTRY					\
 		: : "i" (__FILE__), "i" (__LINE__),		\
-		  "i" (BUGFLAG_TAINT(TAINT_WARN)),		\
+		  "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
 		  "i" (sizeof(struct bug_entry)),		\
 		  "r" (__ret_warn_on));				\
 	}							\

From a9f8553e935f26cb5447f67e280946b0923cd2dc Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 16:18:15 +0530
Subject: [PATCH 17/25] powerpc/kprobes: Pause function_graph tracing during
 jprobes handling

This fixes a crash when function_graph and jprobes are used together.
This is essentially commit 237d28db036e ("ftrace/jprobes/x86: Fix
conflict between jprobes and function graph tracing"), but for powerpc.

Jprobes breaks function_graph tracing since the jprobe hook needs to use
jprobe_return(), which never returns back to the hook, but instead to
the original jprobe'd function. The solution is to momentarily pause
function_graph tracing before invoking the jprobe hook and re-enable it
when returning back to the original jprobe'd function.

Fixes: 6794c78243bf ("powerpc64: port of the function graph tracer")
Cc: stable@vger.kernel.org # v2.6.30+
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/kprobes.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..5075a4d6f1d7 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -617,6 +617,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
 #endif
 
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
+
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +651,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	 * saved regs...
 	 */
 	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* It's OK to start function graph tracing again */
+	unpause_graph_tracing();
 	preempt_enable_no_resched();
 	return 1;
 }

From a4979a7e71eb8da976cbe4a0a1fa50636e76b04f Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 16:18:16 +0530
Subject: [PATCH 18/25] powerpc/ftrace: Pass the correct stack pointer for
 DYNAMIC_FTRACE_WITH_REGS

For DYNAMIC_FTRACE_WITH_REGS, we should be passing-in the original set
of registers in pt_regs, to capture the state _before_ ftrace_caller.
However, we are instead passing the stack pointer *after* allocating a
stack frame in ftrace_caller. Fix this by saving the proper value of r1
in pt_regs. Also, use SAVE_10GPRS() to simplify the code.

Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../powerpc/kernel/trace/ftrace_64_mprofile.S | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..fa0921410fa4 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
 	stdu	r1,-SWITCH_FRAME_SIZE(r1)
 
 	/* Save all gprs to pt_regs */
-	SAVE_8GPRS(0,r1)
-	SAVE_8GPRS(8,r1)
-	SAVE_8GPRS(16,r1)
-	SAVE_8GPRS(24,r1)
+	SAVE_GPR(0, r1)
+	SAVE_10GPRS(2, r1)
+	SAVE_10GPRS(12, r1)
+	SAVE_10GPRS(22, r1)
+
+	/* Save previous stack pointer (r1) */
+	addi	r8, r1, SWITCH_FRAME_SIZE
+	std	r8, GPR1(r1)
 
 	/* Load special regs for save below */
 	mfmsr   r8
@@ -103,10 +107,10 @@ ftrace_call:
 #endif
 
 	/* Restore gprs */
-	REST_8GPRS(0,r1)
-	REST_8GPRS(8,r1)
-	REST_8GPRS(16,r1)
-	REST_8GPRS(24,r1)
+	REST_GPR(0,r1)
+	REST_10GPRS(2,r1)
+	REST_10GPRS(12,r1)
+	REST_10GPRS(22,r1)
 
 	/* Restore possibly modified LR */
 	ld	r0, _LINK(r1)

From c05b8c4474c03026aaa7f8872e78369f69f1bb08 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 1 Jun 2017 16:18:17 +0530
Subject: [PATCH 19/25] powerpc/kprobes: Skip livepatch_handler() for jprobes

ftrace_caller() depends on a modified regs->nip to detect if a certain
function has been livepatched. However, with KPROBES_ON_FTRACE, it is
possible for regs->nip to have been modified by the kprobes pre_handler
(jprobes, for instance). In this case, we do not want to invoke the
livepatch_handler so as not to consume the livepatch stack.

To distinguish between the two (kprobes and livepatch), we check if
there is an active kprobe on the current function. If there is, then we
know for sure that it must have modified the NIP as we don't support
livepatching a kprobe'd function. In this case, we simply skip the
livepatch_handler and branch to the new NIP. Otherwise, the
livepatch_handler is invoked.

Fixes: ead514d5fb30 ("powerpc/kprobes: Add support for KPROBES_ON_FTRACE")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/kprobes.h            |  1 +
 arch/powerpc/kernel/kprobes.c                 |  6 +++
 .../powerpc/kernel/trace/ftrace_64_mprofile.S | 39 ++++++++++++++++---
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 			   struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 5075a4d6f1d7..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
 
+int is_current_kprobe_addr(unsigned long addr)
+{
+	struct kprobe *p = kprobe_running();
+	return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
 	return  (addr >= (unsigned long)__kprobes_text_start &&
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index fa0921410fa4..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -99,13 +99,39 @@ ftrace_call:
 	bl	ftrace_stub
 	nop
 
-	/* Load ctr with the possibly modified NIP */
-	ld	r3, _NIP(r1)
-	mtctr	r3
+	/* Load the possibly modified NIP */
+	ld	r15, _NIP(r1)
+
 #ifdef CONFIG_LIVEPATCH
-	cmpd	r14,r3		/* has NIP been altered? */
+	cmpd	r14, r15	/* has NIP been altered? */
 #endif
 
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+	/* NIP has not been altered, skip over further checks */
+	beq	1f
+
+	/* Check if there is an active kprobe on us */
+	subi	r3, r14, 4
+	bl	is_current_kprobe_addr
+	nop
+
+	/*
+	 * If r3 == 1, then this is a kprobe/jprobe.
+	 * else, this is livepatched function.
+	 *
+	 * The conditional branch for livepatch_handler below will use the
+	 * result of this comparison. For kprobe/jprobe, we just need to branch to
+	 * the new NIP, not call livepatch_handler. The branch below is bne, so we
+	 * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+	 * CR0[EQ] = (r3 == 1).
+	 */
+	cmpdi	r3, 1
+1:
+#endif
+
+	/* Load CTR with the possibly modified NIP */
+	mtctr	r15
+
 	/* Restore gprs */
 	REST_GPR(0,r1)
 	REST_10GPRS(2,r1)
@@ -123,7 +149,10 @@ ftrace_call:
 	addi r1, r1, SWITCH_FRAME_SIZE
 
 #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+	 * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+	 * not on a kprobe/jprobe, then handle livepatch.
+	 */
 	bne-	livepatch_handler
 #endif
 

From d89ba5353f301971dd7d2f9fdf25c4432728f38e Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 14 Jun 2017 00:12:00 +0530
Subject: [PATCH 20/25] powerpc/64s: Handle data breakpoints in Radix mode

On Power9, trying to use data breakpoints throws the splat shown
below. This is because the check for a data breakpoint in DSISR is in
do_hash_page(), which is not called when in Radix mode.

  Unable to handle kernel paging request for data at address 0xc000000000e19218
  Faulting instruction address: 0xc0000000001155e8
  cpu 0x0: Vector: 300 (Data Access) at [c0000000ef1e7b20]
  pc: c0000000001155e8: find_pid_ns+0x48/0xe0
  lr: c000000000116ac4: find_task_by_vpid+0x44/0x90
  sp: c0000000ef1e7da0
  msr: 9000000000009033
  dar: c000000000e19218
  dsisr: 400000

Move the check to handle_page_fault() so as to catch data breakpoints
in both Hash and Radix MMU modes.

We have to change the check in do_hash_page() against 0xa410 to use
0xa450, so as to include the value of (DSISR_DABRMATCH << 16).

There are two sites that call handle_page_fault() when in Radix, both
already pass DSISR in r4.

Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code")
Cc: stable@vger.kernel.org # v4.7+
Reported-by: Shriya R. Kulkarni <shriykul@in.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
[mpe: Fix the fall-through case on hash, we need to reload DSISR]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
 	.balign	IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
-	andis.	r0,r4,0xa410		/* weird error? */
+	andis.	r0,r4,0xa450		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
-	andis.  r0,r4,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
 
 	/* Error */
 	blt-	13f
+
+	/* Reload DSISR into r4 for the DABR check below */
+	ld      r4,_DSISR(r1)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
-11:	ld	r4,_DAR(r1)
+11:	andis.  r0,r4,DSISR_DABRMATCH@h
+	bne-    handle_dabr_fault
+	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_page_fault

From bf05fc25f268cd62f147f368fe65ad3e5b04fe9f Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Thu, 15 Jun 2017 19:16:48 +0530
Subject: [PATCH 21/25] powerpc/perf: Fix oops when kthread execs user process

When a kthread calls call_usermodehelper() the steps are:
  1. allocate current->mm
  2. load_elf_binary()
  3. populate current->thread.regs

While doing this, interrupts are not disabled. If there is a perf
interrupt in the middle of this process (i.e. step 1 has completed
but not yet reached to step 3) and if perf tries to read userspace
regs, kernel oops with following log:

  Unable to handle kernel paging request for data at address 0x00000000
  Faulting instruction address: 0xc0000000000da0fc
  ...
  Call Trace:
  perf_output_sample_regs+0x6c/0xd0
  perf_output_sample+0x4e4/0x830
  perf_event_output_forward+0x64/0x90
  __perf_event_overflow+0x8c/0x1e0
  record_and_restart+0x220/0x5c0
  perf_event_interrupt+0x2d8/0x4d0
  performance_monitor_exception+0x54/0x70
  performance_monitor_common+0x158/0x160
  --- interrupt: f01 at avtab_search_node+0x150/0x1a0
      LR = avtab_search_node+0x100/0x1a0
  ...
  load_elf_binary+0x6e8/0x15a0
  search_binary_handler+0xe8/0x290
  do_execveat_common.isra.14+0x5f4/0x840
  call_usermodehelper_exec_async+0x170/0x210
  ret_from_kernel_thread+0x5c/0x7c

Fix it by setting abi to PERF_SAMPLE_REGS_ABI_NONE when userspace
pt_regs are not set.

Fixes: ed4a4ef85cf5 ("powerpc/perf: Add support for sampling interrupt register state")
Cc: stable@vger.kernel.org # v4.7+
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/perf_regs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 			struct pt_regs *regs_user_copy)
 {
 	regs_user->regs = task_pt_regs(current);
-	regs_user->abi  = perf_reg_abi(current);
+	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+			 PERF_SAMPLE_REGS_ABI_NONE;
 }

From bbd5ff50afffcf4a01d05367524736c57607a478 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Tue, 20 Jun 2017 18:37:28 +1000
Subject: [PATCH 22/25] powerpc/powernv/npu-dma: Add explicit flush when
 sending an ATSD

NPU2 requires an extra explicit flush to an active GPU PID when
sending address translation shoot downs (ATSDs) to reliably flush the
GPU TLB. This patch adds just such a flush at the end of each sequence
of ATSDs.

We can safely use PID 0 which is always reserved and active on the
GPU. PID 0 is only used for init_mm which will never be a user mm on
the GPU. To enforce this we add a check in pnv_npu2_init_context()
just in case someone tries to use PID 0 on the GPU.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
[mpe: Use true/false for bool literals]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 94 ++++++++++++++++--------
 1 file changed, 65 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
 	return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	/* Invalidating the entire process doesn't use a va */
 	return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-			unsigned long pid)
+			unsigned long pid, bool flush)
 {
 	unsigned long launch;
 
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
 	/* PID */
 	launch |= pid << PPC_BITLSHIFT(38);
 
+	/* No flush */
+	launch |= !flush << PPC_BITLSHIFT(39);
+
 	return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+	struct npu *npu;
+	int reg;
+};
+
+static void mmio_invalidate_wait(
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+	struct npu *npu;
+	int i, reg;
+
+	/* Wait for all invalidations to complete */
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		/* Wait for completion */
+		npu = mmio_atsd_reg[i].npu;
+		reg = mmio_atsd_reg[i].reg;
+		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+			cpu_relax();
+
+		put_mmio_atsd_reg(npu, reg);
+
+		/*
+		 * The GPU requires two flush ATSDs to ensure all entries have
+		 * been flushed. We use PID 0 as it will never be used for a
+		 * process on the GPU.
+		 */
+		if (flush)
+			mmio_invalidate_pid(npu, 0, true);
+	}
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-			unsigned long address)
+			unsigned long address, bool flush)
 {
-	int i, j, reg;
+	int i, j;
 	struct npu *npu;
 	struct pnv_phb *nphb;
 	struct pci_dev *npdev;
-	struct {
-		struct npu *npu;
-		int reg;
-	} mmio_atsd_reg[NV_MAX_NPUS];
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 	unsigned long pid = npu_context->mm->context.id;
 
 	/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
 			if (va)
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_va(npu, address, pid);
+					mmio_invalidate_va(npu, address, pid,
+							flush);
 			else
 				mmio_atsd_reg[i].reg =
-					mmio_invalidate_pid(npu, pid);
+					mmio_invalidate_pid(npu, pid, flush);
 
 			/*
 			 * The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	 */
 	flush_tlb_mm(npu_context->mm);
 
-	/* Wait for all invalidations to complete */
-	for (i = 0; i <= max_npu2_index; i++) {
-		if (mmio_atsd_reg[i].reg < 0)
-			continue;
-
-		/* Wait for completion */
-		npu = mmio_atsd_reg[i].npu;
-		reg = mmio_atsd_reg[i].reg;
-		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-			cpu_relax();
-		put_mmio_atsd_reg(npu, reg);
-	}
+	mmio_invalidate_wait(mmio_atsd_reg, flush);
+	if (flush)
+		/* Wait for the flush to complete */
+		mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
 	 * There should be no more translation requests for this PID, but we
 	 * need to ensure any entries for it are removed from the TLB.
 	 */
-	mmio_invalidate(npu_context, 0, 0);
+	mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 
-	mmio_invalidate(npu_context, 1, address);
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
 	struct npu_context *npu_context = mn_to_npu_context(mn);
 	unsigned long address;
 
-	for (address = start; address <= end; address += PAGE_SIZE)
-		mmio_invalidate(npu_context, 1, address);
+	for (address = start; address < end; address += PAGE_SIZE)
+		mmio_invalidate(npu_context, 1, address, false);
+
+	/* Do the flush only on the final addess == end */
+	mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		/* No nvlink associated with this GPU device */
 		return ERR_PTR(-ENODEV);
 
-	if (!mm) {
-		/* kernel thread contexts are not supported */
+	if (!mm || mm->context.id == 0) {
+		/*
+		 * Kernel thread contexts are not supported and context id 0 is
+		 * reserved on the GPU.
+		 */
 		return ERR_PTR(-EINVAL);
 	}
 

From 34f19ff1b5a0d11e46df479623d6936460105c9f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Jun 2017 15:58:29 +1000
Subject: [PATCH 23/25] powerpc/64: Initialise thread_info for emergency stacks

Emergency stacks have their thread_info mostly uninitialised, which in
particular means garbage preempt_count values.

Emergency stack code runs with interrupts disabled entirely, and is
used very rarely, so this has been unnoticed so far. It was found by a
proposed new powerpc watchdog that takes a soft-NMI directly from the
masked_interrupt handler and using the emergency stack. That crashed
at BUG_ON(in_nmi()) in nmi_enter(). preempt_count()s were found to be
garbage.

To fix this, zero the entire THREAD_SIZE allocation, and initialize
the thread_info.

Cc: stable@vger.kernel.org
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Move it all into setup_64.c, use a function not a macro. Fix
      crashes on Cell by setting preempt_count to 0 not HARDIRQ_OFFSET]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void)
 }
 #endif
 
+/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+	ti->task = NULL;
+	ti->cpu = cpu;
+	ti->preempt_count = 0;
+	ti->local_flags = 0;
+	ti->flags = 0;
+	klp_init_thread_info(ti);
+}
+
 /*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled. Exclusive emergency
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
 	 * Since we use these as temporary stacks during secondary CPU
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
+	 *
+	 * The IRQ stacks allocated elsewhere in this file are zeroed and
+	 * initialized in kernel/irq.c. These are initialized here in order
+	 * to have emergency stacks available as early as possible.
 	 */
 	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		struct thread_info *ti;
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for NMI exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
 		/* emergency stack for machine check exception handling. */
 		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-		klp_init_thread_info(ti);
+		memset(ti, 0, THREAD_SIZE);
+		emerg_stack_init_thread_info(ti, i);
 		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}

From 797625deaedd9a0621376817db2813244b3246e3 Mon Sep 17 00:00:00 2001
From: Christophe Lombard <clombard@linux.vnet.ibm.com>
Date: Tue, 13 Jun 2017 17:41:05 +0200
Subject: [PATCH 24/25] cxl: Fixes for Coherent Accelerator Interface
 Architecture 2.0

A previous set of patches "cxl: Add support for Coherent Accelerator
Interface Architecture 2.0" has introduced a new support for the CAPI
cards. These patches have been tested on Simulation environment and
quite a bit of them have been tested on real hardware.

This patch brings new fixes after a series of tests carried out on new
equipment:
  - Add POWER9 definition.
  - Re-enable any masked interrupts when the AFU is not activated
    after resetting the AFU.
  - Remove the api cxl_is_psl8/9 which is no longer useful.
  - Do not dump CAPI1 registers.
  - Rewrite cxl_is_page_fault() function.
  - Do not register slb callack on P9.

Fixes: f24be42aab37 ("cxl: Add psl9 specific code")
Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/context.c |  6 +++---
 drivers/misc/cxl/cxl.h     | 18 +++++-------------
 drivers/misc/cxl/fault.c   | 23 +++++++++++++++--------
 drivers/misc/cxl/main.c    | 17 +++++++++++++----
 drivers/misc/cxl/native.c  | 29 +++++++++++++++++------------
 drivers/misc/cxl/pci.c     | 11 ++++-------
 6 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 4472ce11f98d..8c32040b9c09 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,7 +45,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	mutex_init(&ctx->mapping_lock);
 	ctx->mapping = NULL;
 
-	if (cxl_is_psl8(afu)) {
+	if (cxl_is_power8()) {
 		spin_lock_init(&ctx->sste_lock);
 
 		/*
@@ -189,7 +189,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
 		if (start + len > ctx->afu->adapter->ps_size)
 			return -EINVAL;
 
-		if (cxl_is_psl9(ctx->afu)) {
+		if (cxl_is_power9()) {
 			/*
 			 * Make sure there is a valid problem state
 			 * area space for this AFU.
@@ -324,7 +324,7 @@ static void reclaim_ctx(struct rcu_head *rcu)
 {
 	struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
 
-	if (cxl_is_psl8(ctx->afu))
+	if (cxl_is_power8())
 		free_page((u64)ctx->sstp);
 	if (ctx->ff_page)
 		__free_page(ctx->ff_page);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index c8568ea7c518..a03f8e7535e5 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -357,6 +357,7 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL9_DSISR_An_PF_RGP  0x0000000000000090ULL  /* PTE not found (Radix Guest (parent)) 0b10010000 */
 #define CXL_PSL9_DSISR_An_PF_HRH  0x0000000000000094ULL  /* PTE not found (HPT/Radix Host)       0b10010100 */
 #define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL  /* PTE not found (STEG VA)              0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH   0x00000000000000B4ULL  /* Unsupported Radix Tree Configuration 0b10110100 */
 
 /****** CXL_PSL_TFC_An ******************************************************/
 #define CXL_PSL_TFC_An_A  (1ull << (63-28)) /* Acknowledge non-translation fault */
@@ -844,24 +845,15 @@ static inline bool cxl_is_power8(void)
 
 static inline bool cxl_is_power9(void)
 {
-	/* intermediate solution */
-	if (!cxl_is_power8() &&
-	   (cpu_has_feature(CPU_FTRS_POWER9) ||
-	    cpu_has_feature(CPU_FTR_POWER9_DD1)))
+	if (pvr_version_is(PVR_POWER9))
 		return true;
 	return false;
 }
 
-static inline bool cxl_is_psl8(struct cxl_afu *afu)
+static inline bool cxl_is_power9_dd1(void)
 {
-	if (afu->adapter->caia_major == 1)
-		return true;
-	return false;
-}
-
-static inline bool cxl_is_psl9(struct cxl_afu *afu)
-{
-	if (afu->adapter->caia_major == 2)
+	if ((pvr_version_is(PVR_POWER9)) &&
+	    cpu_has_feature(CPU_FTR_POWER9_DD1))
 		return true;
 	return false;
 }
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 5344448f514e..c79e39bad7a4 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -187,7 +187,7 @@ static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 
 static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 {
-	if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DS))
+	if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
 		return true;
 
 	return false;
@@ -195,15 +195,22 @@ static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 
 static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 {
-	if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DM))
+	u64 crs; /* Translation Checkout Response Status */
+
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
 		return true;
 
-	if ((cxl_is_psl9(ctx->afu)) &&
-	   ((dsisr & CXL_PSL9_DSISR_An_CO_MASK) &
-		(CXL_PSL9_DSISR_An_PF_SLR | CXL_PSL9_DSISR_An_PF_RGC |
-		 CXL_PSL9_DSISR_An_PF_RGP | CXL_PSL9_DSISR_An_PF_HRH |
-		 CXL_PSL9_DSISR_An_PF_STEG)))
-		return true;
+	if (cxl_is_power9()) {
+		crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK);
+		if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
+		    (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
+		    (crs == CXL_PSL9_DSISR_An_URTCH)) {
+			return true;
+		}
+	}
 
 	return false;
 }
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 1703655072b1..c1ba0d42cbc8 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -329,8 +329,15 @@ static int __init init_cxl(void)
 
 	cxl_debugfs_init();
 
-	if ((rc = register_cxl_calls(&cxl_calls)))
-		goto err;
+	/*
+	 * we don't register the callback on P9. slb callack is only
+	 * used for the PSL8 MMU and CX4.
+	 */
+	if (cxl_is_power8()) {
+		rc = register_cxl_calls(&cxl_calls);
+		if (rc)
+			goto err;
+	}
 
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 		cxl_ops = &cxl_native_ops;
@@ -347,7 +354,8 @@ static int __init init_cxl(void)
 
 	return 0;
 err1:
-	unregister_cxl_calls(&cxl_calls);
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
 err:
 	cxl_debugfs_exit();
 	cxl_file_exit();
@@ -366,7 +374,8 @@ static void exit_cxl(void)
 
 	cxl_debugfs_exit();
 	cxl_file_exit();
-	unregister_cxl_calls(&cxl_calls);
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
 	idr_destroy(&cxl_adapter_idr);
 }
 
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 8d6ea9712dbd..2b2f8894149d 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -105,11 +105,16 @@ static int native_afu_reset(struct cxl_afu *afu)
 			   CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
 			   false);
 
-	/* Re-enable any masked interrupts */
-	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
-	serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
-	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
+	/*
+	 * Re-enable any masked interrupts when the AFU is not
+	 * activated to avoid side effects after attaching a process
+	 * in dedicated mode.
+	 */
+	if (afu->current_mode == 0) {
+		serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+		cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	}
 
 	return rc;
 }
@@ -139,9 +144,9 @@ int cxl_psl_purge(struct cxl_afu *afu)
 
 	pr_devel("PSL purge request\n");
 
-	if (cxl_is_psl8(afu))
+	if (cxl_is_power8())
 		trans_fault = CXL_PSL_DSISR_TRANS;
-	if (cxl_is_psl9(afu))
+	if (cxl_is_power9())
 		trans_fault = CXL_PSL9_DSISR_An_TF;
 
 	if (!cxl_ops->link_ok(afu->adapter, afu)) {
@@ -603,7 +608,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
 		if (!test_tsk_thread_flag(current, TIF_32BIT))
 			sr |= CXL_PSL_SR_An_SF;
 	}
-	if (cxl_is_psl9(ctx->afu)) {
+	if (cxl_is_power9()) {
 		if (radix_enabled())
 			sr |= CXL_PSL_SR_An_XLAT_ror;
 		else
@@ -1117,10 +1122,10 @@ static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
 
 static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
 {
-	if ((cxl_is_psl8(afu)) && (dsisr & CXL_PSL_DSISR_TRANS))
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
 		return true;
 
-	if ((cxl_is_psl9(afu)) && (dsisr & CXL_PSL9_DSISR_An_TF))
+	if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
 		return true;
 
 	return false;
@@ -1194,10 +1199,10 @@ static void native_irq_wait(struct cxl_context *ctx)
 		if (ph != ctx->pe)
 			return;
 		dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
-		if (cxl_is_psl8(ctx->afu) &&
+		if (cxl_is_power8() &&
 		   ((dsisr & CXL_PSL_DSISR_PENDING) == 0))
 			return;
-		if (cxl_is_psl9(ctx->afu) &&
+		if (cxl_is_power9() &&
 		   ((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
 			return;
 		/*
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 6dc1ee5b92c9..1eb9859809bf 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -436,7 +436,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
 	/* nMMU_ID Defaults to: b’000001001’*/
 	xsl_dsnctl |= ((u64)0x09 << (63-28));
 
-	if (cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+	if (!(cxl_is_power9_dd1())) {
 		/*
 		 * Used to identify CAPI packets which should be sorted into
 		 * the Non-Blocking queues by the PHB. This field should match
@@ -491,7 +491,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
 	cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL);
 
 	/* Disable vc dd1 fix */
-	if ((cxl_is_power9() && cpu_has_feature(CPU_FTR_POWER9_DD1)))
+	if (cxl_is_power9_dd1())
 		cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL);
 
 	return 0;
@@ -1439,8 +1439,7 @@ int cxl_pci_reset(struct cxl *adapter)
 	 * The adapter is about to be reset, so ignore errors.
 	 * Not supported on P9 DD1
 	 */
-	if ((cxl_is_power8()) ||
-	    ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
 		cxl_data_cache_flush(adapter);
 
 	/* pcie_warm_reset requests a fundamental pci reset which includes a
@@ -1750,7 +1749,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
 	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
 	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
 	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
-	.err_irq_dump_registers = cxl_native_err_irq_dump_regs,
 	.debugfs_stop_trace = cxl_stop_trace_psl9,
 	.write_timebase_ctrl = write_timebase_ctrl_psl9,
 	.timebase_read = timebase_read_psl9,
@@ -1889,8 +1887,7 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
 	 * Flush adapter datacache as its about to be removed.
 	 * Not supported on P9 DD1.
 	 */
-	if ((cxl_is_power8()) ||
-	    ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
 		cxl_data_cache_flush(adapter);
 
 	cxl_deconfigure_adapter(adapter);

From d6bd8194e2867e85ac2de63486d3b83ccfae4e62 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 26 Jun 2017 11:30:57 +1000
Subject: [PATCH 25/25] powerpc/32: Avoid miscompilation w/GCC 4.6.3 - don't
 inline copy_to/from_user()

Larry Finger reported that his Powerbook G4 was no longer booting with v4.12-rc,
userspace was up but giving weird errors such as:

  udevd[64]: starting version 175
  udevd[64]: Unable to receive ctrl message: Bad address.
  modprobe: chdir(4.12-rc1): No such file or directory

He bisected the problem to commit 3448890c32c3 ("powerpc: get rid of zeroing,
switch to RAW_COPY_USER").

Al identified that the problem is actually a miscompilation by GCC 4.6.3, which
is exposed by the above commit.

Al also pointed out that inlining copy_to/from_user() is probably of little or
no benefit, which is correct. Using Anton's copy_to_user benchmark, with a
pathological single byte copy, we see a small increase in performance
by *removing* inlining:

  Before (inlined):
  # time ./copy_to_user -w -l 1 -i 10000000	( x 3 )
  real	0m22.063s
  real	0m22.059s
  real	0m22.076s

  After:
  # time ./copy_to_user -w -l 1 -i 10000000	( x 3 )
  real	0m21.325s
  real	0m21.299s
  real	0m21.364s

So as a small performance improvement and to avoid the miscompilation, drop
inlining copy_to/from_user() on 32-bit.

Fixes: 3448890c32c3 ("powerpc: get rid of zeroing, switch to RAW_COPY_USER")
Reported-by: Larry Finger <Larry.Finger@lwfinger.net>
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/uaccess.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5c0d8a8cdae5..41e88d3ce36b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -267,13 +267,7 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {