From 6d592c557e2d7131585ea7f6a3f214aba71e8776 Mon Sep 17 00:00:00 2001
From: Matthieu Bucchianeri <matthieu.bucchianeri@leostella.com>
Date: Mon, 27 Jul 2020 10:21:14 -0700
Subject: [PATCH 01/40] target/ppc: Fix TCG leak with the evmwsmiaa instruction

Fix double-call to tcg_temp_new_i64(), where a temp is allocated both at
declaration time and further down the implementation of gen_evmwsmiaa().

Note that gen_evmwsmia() and gen_evmwsmiaa() are still not implemented
correctly, as they invoke gen_evmwsmi() which may return early, but the
return is not propagated. This will be fixed in my patch for bug #1888918.

Signed-off-by: Matthieu Bucchianeri <matthieu.bucchianeri@leostella.com>
Message-Id: <20200727172114.31415-1-matthieu.bucchianeri@leostella.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/translate/spe-impl.inc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate/spe-impl.inc.c b/target/ppc/translate/spe-impl.inc.c
index 36b4d5654d..42a0d1cffb 100644
--- a/target/ppc/translate/spe-impl.inc.c
+++ b/target/ppc/translate/spe-impl.inc.c
@@ -531,8 +531,8 @@ static inline void gen_evmwsmia(DisasContext *ctx)
 
 static inline void gen_evmwsmiaa(DisasContext *ctx)
 {
-    TCGv_i64 acc = tcg_temp_new_i64();
-    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 acc;
+    TCGv_i64 tmp;
 
     gen_evmwsmi(ctx);           /* rD := rA * rB */
 

From ca7a2fdaa19ebbe91598d13fedbced43d47fff99 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Wed, 1 Jul 2020 18:43:36 -0500
Subject: [PATCH 02/40] target/ppc: Introduce Power ISA 3.1 flag

This flag will be used for Power10 instructions.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200701234344.91843-2-ljp@linux.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/cpu.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index e7d382ac10..7bfee8211f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2191,6 +2191,8 @@ enum {
     PPC2_PM_ISA206     = 0x0000000000040000ULL,
     /* POWER ISA 3.0                                                         */
     PPC2_ISA300        = 0x0000000000080000ULL,
+    /* POWER ISA 3.1                                                         */
+    PPC2_ISA310        = 0x0000000000100000ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
                         PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \

From 9495edb08d9d7cd5674a237d95dbabfa7b355340 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Wed, 1 Jul 2020 18:43:37 -0500
Subject: [PATCH 03/40] target/ppc: Enable Power ISA 3.1

This patch enables the Power ISA 3.1 in QEMU.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200701234344.91843-3-ljp@linux.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/cpu.h                | 2 +-
 target/ppc/translate_init.inc.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 7bfee8211f..3c4e1b3475 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2201,7 +2201,7 @@ enum {
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
                         PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
                         PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \
-                        PPC2_ISA300)
+                        PPC2_ISA300 | PPC2_ISA310)
 };
 
 /*****************************************************************************/
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 7e66822b5d..5134123dd6 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -9201,7 +9201,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
                         PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
                         PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
                         PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-                        PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL;
+                        PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310;
     pcc->msr_mask = (1ull << MSR_SF) |
                     (1ull << MSR_HV) |
                     (1ull << MSR_TM) |

From 9d69cfa2faa7782ec91b9e42de3abb0a442afca8 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Wed, 1 Jul 2020 18:43:38 -0500
Subject: [PATCH 04/40] target/ppc: add byte-reverse br[dwh] instructions

POWER ISA 3.1 introduces following byte-reverse instructions:
brd: Byte-Reverse Doubleword X-form
brw: Byte-Reverse Word X-form
brh: Byte-Reverse Halfword X-form

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200701234344.91843-4-ljp@linux.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/translate.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 4ce3d664b5..590c3e3bc7 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6971,7 +6971,47 @@ static void gen_dform3D(DisasContext *ctx)
     return gen_invalid(ctx);
 }
 
+#if defined(TARGET_PPC64)
+/* brd */
+static void gen_brd(DisasContext *ctx)
+{
+    tcg_gen_bswap64_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+}
+
+/* brw */
+static void gen_brw(DisasContext *ctx)
+{
+    tcg_gen_bswap64_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    tcg_gen_rotli_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 32);
+
+}
+
+/* brh */
+static void gen_brh(DisasContext *ctx)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_movi_i64(t0, 0x00ff00ff00ff00ffull);
+    tcg_gen_shri_i64(t1, cpu_gpr[rS(ctx->opcode)], 8);
+    tcg_gen_and_i64(t2, t1, t0);
+    tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], t0);
+    tcg_gen_shli_i64(t1, t1, 8);
+    tcg_gen_or_i64(cpu_gpr[rA(ctx->opcode)], t1, t2);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+#endif
+
 static opcode_t opcodes[] = {
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(brd, 0x1F, 0x1B, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA310),
+GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
+GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
+#endif
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),

From a285ffa680d14cdd7a88e3d551024e6f66684cba Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Wed, 1 Jul 2020 18:43:39 -0500
Subject: [PATCH 05/40] target/ppc: convert vmuluwm to tcg_gen_gvec_mul

Convert the original implementation of vmuluwm to the more generic
tcg_gen_gvec_mul.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200701234344.91843-5-ljp@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/helper.h                 |  1 -
 target/ppc/int_helper.c             | 13 -------------
 target/ppc/translate/vmx-impl.inc.c |  2 +-
 3 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 90166cbabd..032da717f7 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -184,7 +184,6 @@ DEF_HELPER_3(vmulosw, void, avr, avr, avr)
 DEF_HELPER_3(vmuloub, void, avr, avr, avr)
 DEF_HELPER_3(vmulouh, void, avr, avr, avr)
 DEF_HELPER_3(vmulouw, void, avr, avr, avr)
-DEF_HELPER_3(vmuluwm, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
 DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index d8bd3c234a..263e899fe0 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -523,19 +523,6 @@ void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
     r->VsrD(0) = 0;
 }
 
-#define VARITH_DO(name, op, element)                                    \
-    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
-            r->element[i] = a->element[i] op b->element[i];             \
-        }                                                               \
-    }
-VARITH_DO(muluwm, *, u32)
-#undef VARITH_DO
-#undef VARITH
-
 #define VARITHFP(suffix, func)                                          \
     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
                           ppc_avr_t *b)                                 \
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index de2fd136ff..b6c9290707 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -801,7 +801,7 @@ static void trans_vclzd(DisasContext *ctx)
 GEN_VXFORM(vmuloub, 4, 0);
 GEN_VXFORM(vmulouh, 4, 1);
 GEN_VXFORM(vmulouw, 4, 2);
-GEN_VXFORM(vmuluwm, 4, 2);
+GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2);
 GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE,
                 vmuluwm, PPC_NONE, PPC2_ALTIVEC_207)
 GEN_VXFORM(vmulosb, 4, 4);

From adcced87842d01229993605321761a41869b9128 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Wed, 1 Jul 2020 18:43:40 -0500
Subject: [PATCH 06/40] target/ppc: add vmulld instruction

vmulld: Vector Multiply Low Doubleword.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200701234344.91843-6-ljp@linux.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/translate/vmx-impl.inc.c | 1 +
 target/ppc/translate/vmx-ops.inc.c  | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index b6c9290707..f8e8b978ec 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -807,6 +807,7 @@ GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE,
 GEN_VXFORM(vmulosb, 4, 4);
 GEN_VXFORM(vmulosh, 4, 5);
 GEN_VXFORM(vmulosw, 4, 6);
+GEN_VXFORM_V(vmulld, MO_64, tcg_gen_gvec_mul, 4, 7);
 GEN_VXFORM(vmuleub, 4, 8);
 GEN_VXFORM(vmuleuh, 4, 9);
 GEN_VXFORM(vmuleuw, 4, 10);
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index 84e05fb827..b49787ac97 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -48,6 +48,9 @@ GEN_HANDLER_E(name, 0x04, opc2, opc3, inval, PPC_NONE, PPC2_ISA300)
 GEN_HANDLER_E_2(name, 0x04, opc2, opc3, opc4, 0x00000000, PPC_NONE,     \
                                                        PPC2_ISA300)
 
+#define GEN_VXFORM_310(name, opc2, opc3)                                \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA310)
+
 #define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \
 GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
 
@@ -104,6 +107,7 @@ GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vmulosb, 4, 4),
 GEN_VXFORM(vmulosh, 4, 5),
 GEN_VXFORM_207(vmulosw, 4, 6),
+GEN_VXFORM_310(vmulld, 4, 7),
 GEN_VXFORM(vmuleub, 4, 8),
 GEN_VXFORM(vmuleuh, 4, 9),
 GEN_VXFORM_207(vmuleuw, 4, 10),

From 7abf97975088595682f52c7cfb030b45ea38d8c3 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@ozlabs.org>
Date: Wed, 15 Jul 2020 10:42:28 +1000
Subject: [PATCH 07/40] ppc/spapr: Fix 32 bit logical memory block size
 assumptions

When testing large LMB sizes (eg 4GB), I found a couple of places
that assume they are 32bit in size.

Signed-off-by: Anton Blanchard <anton@ozlabs.org>
Message-Id: <20200715004228.1262681-1-anton@ozlabs.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0ae293ec94..a5bb0736e2 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -558,7 +558,8 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
     int nb_numa_nodes = machine->numa_state->num_nodes;
     int ret, i, offset;
     uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
-    uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
+    uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32),
+                                cpu_to_be32(lmb_size & 0xffffffff)};
     uint32_t *int_buf, *cur_index, buf_len;
     int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
     MemoryDeviceInfoList *dimms = NULL;
@@ -905,7 +906,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
     uint32_t lrdr_capacity[] = {
         cpu_to_be32(max_device_addr >> 32),
         cpu_to_be32(max_device_addr & 0xffffffff),
-        0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
+        cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32),
+        cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff),
         cpu_to_be32(ms->smp.max_cpus / ms->smp.threads),
     };
     uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);

From d9c5b5fa86ce37125dcc64c6274a1bc48cf11903 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Tue, 28 Jul 2020 15:29:34 +0200
Subject: [PATCH 08/40] spapr: Use error_append_hint() in spapr_caps.c

We have a dedicated error API for hints. Use it instead of embedding
the hint in the error message, as recommanded in the "qapi/error.h"
header file.

While here, have cap_fwnmi_apply(), which already uses
error_append_hint(), to call ERRP_GUARD() as well.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <159594297421.8262.14314530897345809924.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_caps.c | 89 +++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 39 deletions(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 3225fc5a2e..275f5bd034 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -180,24 +180,24 @@ static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name,
 
 static void cap_htm_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     if (!val) {
         /* TODO: We don't support disabling htm yet */
         return;
     }
     if (tcg_enabled()) {
-        error_setg(errp,
-                   "No Transactional Memory support in TCG,"
-                   " try appending -machine cap-htm=off");
+        error_setg(errp, "No Transactional Memory support in TCG");
+        error_append_hint(errp, "Try appending -machine cap-htm=off\n");
     } else if (kvm_enabled() && !kvmppc_has_cap_htm()) {
         error_setg(errp,
-"KVM implementation does not support Transactional Memory,"
-                   " try appending -machine cap-htm=off"
-            );
+                   "KVM implementation does not support Transactional Memory");
+        error_append_hint(errp, "Try appending -machine cap-htm=off\n");
     }
 }
 
 static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     CPUPPCState *env = &cpu->env;
 
@@ -209,13 +209,14 @@ static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
      * rid of anything that doesn't do VMX */
     g_assert(env->insns_flags & PPC_ALTIVEC);
     if (!(env->insns_flags2 & PPC2_VSX)) {
-        error_setg(errp, "VSX support not available,"
-                   " try appending -machine cap-vsx=off");
+        error_setg(errp, "VSX support not available");
+        error_append_hint(errp, "Try appending -machine cap-vsx=off\n");
     }
 }
 
 static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     CPUPPCState *env = &cpu->env;
 
@@ -224,8 +225,8 @@ static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
         return;
     }
     if (!(env->insns_flags2 & PPC2_DFP)) {
-        error_setg(errp, "DFP support not available,"
-                   " try appending -machine cap-dfp=off");
+        error_setg(errp, "DFP support not available");
+        error_append_hint(errp, "Try appending -machine cap-dfp=off\n");
     }
 }
 
@@ -239,6 +240,7 @@ SpaprCapPossible cap_cfpc_possible = {
 static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val,
                                  Error **errp)
 {
+    ERRP_GUARD();
     uint8_t kvm_val =  kvmppc_get_cap_safe_cache();
 
     if (tcg_enabled() && val) {
@@ -247,9 +249,9 @@ static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val,
                     cap_cfpc_possible.vals[val]);
     } else if (kvm_enabled() && (val > kvm_val)) {
         error_setg(errp,
-                   "Requested safe cache capability level not supported by kvm,"
-                   " try appending -machine cap-cfpc=%s",
-                   cap_cfpc_possible.vals[kvm_val]);
+                   "Requested safe cache capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-cfpc=%s\n",
+                          cap_cfpc_possible.vals[kvm_val]);
     }
 }
 
@@ -263,6 +265,7 @@ SpaprCapPossible cap_sbbc_possible = {
 static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val,
                                         Error **errp)
 {
+    ERRP_GUARD();
     uint8_t kvm_val =  kvmppc_get_cap_safe_bounds_check();
 
     if (tcg_enabled() && val) {
@@ -271,9 +274,9 @@ static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val,
                     cap_sbbc_possible.vals[val]);
     } else if (kvm_enabled() && (val > kvm_val)) {
         error_setg(errp,
-"Requested safe bounds check capability level not supported by kvm,"
-                   " try appending -machine cap-sbbc=%s",
-                   cap_sbbc_possible.vals[kvm_val]);
+"Requested safe bounds check capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-sbbc=%s\n",
+                          cap_sbbc_possible.vals[kvm_val]);
     }
 }
 
@@ -290,6 +293,7 @@ SpaprCapPossible cap_ibs_possible = {
 static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr,
                                            uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     uint8_t kvm_val = kvmppc_get_cap_safe_indirect_branch();
 
     if (tcg_enabled() && val) {
@@ -298,9 +302,9 @@ static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr,
                     cap_ibs_possible.vals[val]);
     } else if (kvm_enabled() && (val > kvm_val)) {
         error_setg(errp,
-"Requested safe indirect branch capability level not supported by kvm,"
-                   " try appending -machine cap-ibs=%s",
-                   cap_ibs_possible.vals[kvm_val]);
+"Requested safe indirect branch capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-ibs=%s\n",
+                          cap_ibs_possible.vals[kvm_val]);
     }
 }
 
@@ -377,23 +381,25 @@ static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr,
 static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
                                     uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     if (!val) {
         /* capability disabled by default */
         return;
     }
 
     if (tcg_enabled()) {
-        error_setg(errp,
-                   "No Nested KVM-HV support in tcg,"
-                   " try appending -machine cap-nested-hv=off");
+        error_setg(errp, "No Nested KVM-HV support in TCG");
+        error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n");
     } else if (kvm_enabled()) {
         if (!kvmppc_has_cap_nested_kvm_hv()) {
             error_setg(errp,
-"KVM implementation does not support Nested KVM-HV,"
-                       " try appending -machine cap-nested-hv=off");
+                       "KVM implementation does not support Nested KVM-HV");
+            error_append_hint(errp,
+                              "Try appending -machine cap-nested-hv=off\n");
         } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
-                error_setg(errp,
-"Error enabling cap-nested-hv with KVM, try cap-nested-hv=off");
+                error_setg(errp, "Error enabling cap-nested-hv with KVM");
+                error_append_hint(errp,
+                                  "Try appending -machine cap-nested-hv=off\n");
         }
     }
 }
@@ -401,6 +407,7 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
 static void cap_large_decr_apply(SpaprMachineState *spapr,
                                  uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 
@@ -411,22 +418,23 @@ static void cap_large_decr_apply(SpaprMachineState *spapr,
     if (tcg_enabled()) {
         if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
                               spapr->max_compat_pvr)) {
-            error_setg(errp,
-                "Large decrementer only supported on POWER9, try -cpu POWER9");
+            error_setg(errp, "Large decrementer only supported on POWER9");
+            error_append_hint(errp, "Try -cpu POWER9\n");
             return;
         }
     } else if (kvm_enabled()) {
         int kvm_nr_bits = kvmppc_get_cap_large_decr();
 
         if (!kvm_nr_bits) {
-            error_setg(errp,
-                       "No large decrementer support,"
-                        " try appending -machine cap-large-decr=off");
+            error_setg(errp, "No large decrementer support");
+            error_append_hint(errp,
+                              "Try appending -machine cap-large-decr=off\n");
         } else if (pcc->lrg_decr_bits != kvm_nr_bits) {
             error_setg(errp,
-"KVM large decrementer size (%d) differs to model (%d),"
-                " try appending -machine cap-large-decr=off",
-                kvm_nr_bits, pcc->lrg_decr_bits);
+                       "KVM large decrementer size (%d) differs to model (%d)",
+                       kvm_nr_bits, pcc->lrg_decr_bits);
+            error_append_hint(errp,
+                              "Try appending -machine cap-large-decr=off\n");
         }
     }
 }
@@ -435,14 +443,15 @@ static void cap_large_decr_cpu_apply(SpaprMachineState *spapr,
                                      PowerPCCPU *cpu,
                                      uint8_t val, Error **errp)
 {
+    ERRP_GUARD();
     CPUPPCState *env = &cpu->env;
     target_ulong lpcr = env->spr[SPR_LPCR];
 
     if (kvm_enabled()) {
         if (kvmppc_enable_cap_large_decr(cpu, val)) {
-            error_setg(errp,
-                       "No large decrementer support,"
-                       " try appending -machine cap-large-decr=off");
+            error_setg(errp, "No large decrementer support");
+            error_append_hint(errp,
+                              "Try appending -machine cap-large-decr=off\n");
         }
     }
 
@@ -457,6 +466,7 @@ static void cap_large_decr_cpu_apply(SpaprMachineState *spapr,
 static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
                                  Error **errp)
 {
+    ERRP_GUARD();
     uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist();
 
     if (tcg_enabled() && val) {
@@ -479,14 +489,15 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
             return;
         }
         error_setg(errp,
-"Requested count cache flush assist capability level not supported by kvm,"
-                   " try appending -machine cap-ccf-assist=off");
+                   "Requested count cache flush assist capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-ccf-assist=off\n");
     }
 }
 
 static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
                                 Error **errp)
 {
+    ERRP_GUARD();
     if (!val) {
         return; /* Disabled by default */
     }

From 19d55e2031f2473a65ffc11aff5b1059e7c4173b Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 16 Jul 2020 19:11:21 +0200
Subject: [PATCH 09/40] spapr: Forbid nested KVM-HV in pre-power9 compat mode

Nested KVM HV only works if the kernel is using the radix MMU mode, ie.
the CPU is POWER9 and it is not running in some pre-power9 compat mode.
Otherwise, the KVM HV module fails to load in the guest with -ENODEV.
It might be painful for a user to discover this late that nested cannot
work with their setup. Erroring out at machine init instead seems to be
the best we can do.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <159491948127.188975.9621435875869177751.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_caps.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 275f5bd034..10a80a8159 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -382,6 +382,8 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
                                     uint8_t val, Error **errp)
 {
     ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
     if (!val) {
         /* capability disabled by default */
         return;
@@ -391,6 +393,14 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
         error_setg(errp, "No Nested KVM-HV support in TCG");
         error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n");
     } else if (kvm_enabled()) {
+        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+            error_setg(errp, "Nested KVM-HV only supported on POWER9");
+            error_append_hint(errp,
+                              "Try appending -machine max-cpu-compat=power9\n");
+            return;
+        }
+
         if (!kvmppc_has_cap_nested_kvm_hv()) {
             error_setg(errp,
                        "KVM implementation does not support Nested KVM-HV");

From 4b160fad4f6179fa7f8385f87f124cfce1f809d3 Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.ibm.com>
Date: Wed, 22 Jul 2020 19:43:54 -0400
Subject: [PATCH 10/40] ppc/xive: Fix some typos in comments

Fix some typos in comments about code modeling coalescing points in the
XIVE routing engine (IVRE).

Signed-off-by: Gustavo Romero <gromero@linux.ibm.com>
Message-Id: <1595461434-27725-1-git-send-email-gromero@linux.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/xive.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 9a162431e0..9b55e0356c 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1502,7 +1502,7 @@ static bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
 
 /*
  * Notification using the END ESe/ESn bit (Event State Buffer for
- * escalation and notification). Profide futher coalescing in the
+ * escalation and notification). Provide further coalescing in the
  * Router.
  */
 static bool xive_router_end_es_notify(XiveRouter *xrtr, uint8_t end_blk,
@@ -1581,7 +1581,7 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
 
     /*
      * Check the END ESn (Event State Buffer for notification) for
-     * even futher coalescing in the Router
+     * even further coalescing in the Router
      */
     if (!xive_end_is_notify(&end)) {
         /* ESn[Q]=1 : end of notification */
@@ -1660,7 +1660,7 @@ do_escalation:
 
     /*
      * Check the END ESe (Event State Buffer for escalation) for even
-     * futher coalescing in the Router
+     * further coalescing in the Router
      */
     if (!xive_end_is_uncond_escalation(&end)) {
         /* ESe[Q]=1 : end of notification */

From de55d3b3815430c27b2e8fe36f85d8e3f2026c95 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Thu, 23 Jul 2020 23:58:40 -0500
Subject: [PATCH 11/40] Update PowerPC AT_HWCAP2 definition

Add PPC2_FEATURE2_ARCH_3_10 to the PowerPC AT_HWCAP2 definitions.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200724045845.89976-2-ljp@linux.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/elf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/elf.h b/include/elf.h
index 5b06b55f28..c117a4d1ab 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -558,6 +558,7 @@ typedef struct {
 #define PPC_FEATURE2_HTM_NOSC           0x01000000
 #define PPC_FEATURE2_ARCH_3_00          0x00800000
 #define PPC_FEATURE2_HAS_IEEE128        0x00400000
+#define PPC_FEATURE2_ARCH_3_10          0x00040000
 
 /* Bits present in AT_HWCAP for Sparc.  */
 

From 73ebe95e8e593567dce9477cb4adf86560c7d377 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Thu, 23 Jul 2020 23:58:41 -0500
Subject: [PATCH 12/40] target/ppc: add vmulld to INDEX_op_mul_vec case

Group vmuluwm and vmulld. Make vmulld-specific
changes since it belongs to new ISA 3.1.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200724045845.89976-3-ljp@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcg/ppc/tcg-target.h     |  2 ++
 tcg/ppc/tcg-target.inc.c | 12 ++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index be5b2901c3..aee38157a2 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -63,6 +63,7 @@ typedef enum {
     tcg_isa_2_06,
     tcg_isa_2_07,
     tcg_isa_3_00,
+    tcg_isa_3_10,
 } TCGPowerISA;
 
 extern TCGPowerISA have_isa;
@@ -72,6 +73,7 @@ extern bool have_vsx;
 #define have_isa_2_06  (have_isa >= tcg_isa_2_06)
 #define have_isa_2_07  (have_isa >= tcg_isa_2_07)
 #define have_isa_3_00  (have_isa >= tcg_isa_3_00)
+#define have_isa_3_10  (have_isa >= tcg_isa_3_10)
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi */
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index c8d1e765d9..0e78260e60 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -564,6 +564,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define VMULOUH    VX4(72)
 #define VMULOUW    VX4(136)       /* v2.07 */
 #define VMULUWM    VX4(137)       /* v2.07 */
+#define VMULLD     VX4(457)       /* v3.10 */
 #define VMSUMUHM   VX4(38)
 
 #define VMRGHB     VX4(12)
@@ -3022,6 +3023,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
             return -1;
         case MO_32:
             return have_isa_2_07 ? 1 : -1;
+        case MO_64:
+            return have_isa_3_10;
         }
         return 0;
     case INDEX_op_bitsel_vec:
@@ -3158,6 +3161,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     static const uint32_t
         add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
         sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
+        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
         neg_op[4] = { 0, 0, VNEGW, VNEGD },
         eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
         ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
@@ -3208,8 +3212,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         a1 = 0;
         break;
     case INDEX_op_mul_vec:
-        tcg_debug_assert(vece == MO_32 && have_isa_2_07);
-        insn = VMULUWM;
+        insn = mul_op[vece];
         break;
     case INDEX_op_ssadd_vec:
         insn = ssadd_op[vece];
@@ -3729,6 +3732,11 @@ static void tcg_target_init(TCGContext *s)
         have_isa = tcg_isa_3_00;
     }
 #endif
+#ifdef PPC_FEATURE2_ARCH_3_10
+    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
+        have_isa = tcg_isa_3_10;
+    }
+#endif
 
 #ifdef PPC_FEATURE2_HAS_ISEL
     /* Prefer explicit instruction from the kernel. */

From f3e0d864abb4de0a324cf7e77cb242b57ccb45c3 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Thu, 23 Jul 2020 23:58:42 -0500
Subject: [PATCH 13/40] target/ppc: add vmulh{su}w instructions

vmulhsw: Vector Multiply High Signed Word
vmulhuw: Vector Multiply High Unsigned Word

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200724045845.89976-4-ljp@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/helper.h                 |  2 ++
 target/ppc/int_helper.c             | 19 +++++++++++++++++++
 target/ppc/translate/vmx-impl.inc.c |  6 ++++++
 target/ppc/translate/vmx-ops.inc.c  |  4 ++--
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 032da717f7..c218bb13ec 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -184,6 +184,8 @@ DEF_HELPER_3(vmulosw, void, avr, avr, avr)
 DEF_HELPER_3(vmuloub, void, avr, avr, avr)
 DEF_HELPER_3(vmulouh, void, avr, avr, avr)
 DEF_HELPER_3(vmulouw, void, avr, avr, avr)
+DEF_HELPER_3(vmulhsw, void, avr, avr, avr)
+DEF_HELPER_3(vmulhuw, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
 DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 263e899fe0..1e866b7d3b 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1086,6 +1086,25 @@ VMUL(uw, u32, VsrW, VsrD, uint64_t)
 #undef VMUL_DO_ODD
 #undef VMUL
 
+void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
+    }
+}
+
+void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
+                               (uint64_t)b->u32[i]) >> 32);
+    }
+}
+
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                   ppc_avr_t *c)
 {
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index f8e8b978ec..79631e56b4 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -811,9 +811,15 @@ GEN_VXFORM_V(vmulld, MO_64, tcg_gen_gvec_mul, 4, 7);
 GEN_VXFORM(vmuleub, 4, 8);
 GEN_VXFORM(vmuleuh, 4, 9);
 GEN_VXFORM(vmuleuw, 4, 10);
+GEN_VXFORM(vmulhuw, 4, 10);
+GEN_VXFORM_DUAL(vmuleuw, PPC_ALTIVEC, PPC_NONE,
+                vmulhuw, PPC_NONE, PPC2_ISA310);
 GEN_VXFORM(vmulesb, 4, 12);
 GEN_VXFORM(vmulesh, 4, 13);
 GEN_VXFORM(vmulesw, 4, 14);
+GEN_VXFORM(vmulhsw, 4, 14);
+GEN_VXFORM_DUAL(vmulesw, PPC_ALTIVEC, PPC_NONE,
+                vmulhsw, PPC_NONE, PPC2_ISA310);
 GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4);
 GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5);
 GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6);
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index b49787ac97..29701ad778 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -110,10 +110,10 @@ GEN_VXFORM_207(vmulosw, 4, 6),
 GEN_VXFORM_310(vmulld, 4, 7),
 GEN_VXFORM(vmuleub, 4, 8),
 GEN_VXFORM(vmuleuh, 4, 9),
-GEN_VXFORM_207(vmuleuw, 4, 10),
+GEN_VXFORM_DUAL(vmuleuw, vmulhuw, 4, 10, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vmulesb, 4, 12),
 GEN_VXFORM(vmulesh, 4, 13),
-GEN_VXFORM_207(vmulesw, 4, 14),
+GEN_VXFORM_DUAL(vmulesw, vmulhsw, 4, 14, PPC_ALTIVEC, PPC_NONE),
 GEN_VXFORM(vslb, 2, 4),
 GEN_VXFORM(vslh, 2, 5),
 GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE),

From c4b8b49d68856ffacacfd792b0ab0d4aa0982e8d Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Thu, 23 Jul 2020 23:58:43 -0500
Subject: [PATCH 14/40] target/ppc: add vmulh{su}d instructions

vmulhsd: Vector Multiply High Signed Doubleword
vmulhud: Vector Multiply High Unsigned Doubleword

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200724045845.89976-5-ljp@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/helper.h                 |  2 ++
 target/ppc/int_helper.c             | 16 ++++++++++++++++
 target/ppc/translate/vmx-impl.inc.c |  2 ++
 target/ppc/translate/vmx-ops.inc.c  |  2 ++
 4 files changed, 22 insertions(+)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index c218bb13ec..6a4dccf70c 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -186,6 +186,8 @@ DEF_HELPER_3(vmulouh, void, avr, avr, avr)
 DEF_HELPER_3(vmulouw, void, avr, avr, avr)
 DEF_HELPER_3(vmulhsw, void, avr, avr, avr)
 DEF_HELPER_3(vmulhuw, void, avr, avr, avr)
+DEF_HELPER_3(vmulhsd, void, avr, avr, avr)
+DEF_HELPER_3(vmulhud, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
 DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 1e866b7d3b..57cda75ed1 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1105,6 +1105,22 @@ void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     }
 }
 
+void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    uint64_t discard;
+
+    muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
+    muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
+}
+
+void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    uint64_t discard;
+
+    mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
+    mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
+}
+
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                   ppc_avr_t *c)
 {
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 79631e56b4..92b9527aff 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -812,6 +812,7 @@ GEN_VXFORM(vmuleub, 4, 8);
 GEN_VXFORM(vmuleuh, 4, 9);
 GEN_VXFORM(vmuleuw, 4, 10);
 GEN_VXFORM(vmulhuw, 4, 10);
+GEN_VXFORM(vmulhud, 4, 11);
 GEN_VXFORM_DUAL(vmuleuw, PPC_ALTIVEC, PPC_NONE,
                 vmulhuw, PPC_NONE, PPC2_ISA310);
 GEN_VXFORM(vmulesb, 4, 12);
@@ -820,6 +821,7 @@ GEN_VXFORM(vmulesw, 4, 14);
 GEN_VXFORM(vmulhsw, 4, 14);
 GEN_VXFORM_DUAL(vmulesw, PPC_ALTIVEC, PPC_NONE,
                 vmulhsw, PPC_NONE, PPC2_ISA310);
+GEN_VXFORM(vmulhsd, 4, 15);
 GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4);
 GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5);
 GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6);
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index 29701ad778..f3f4855111 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -111,9 +111,11 @@ GEN_VXFORM_310(vmulld, 4, 7),
 GEN_VXFORM(vmuleub, 4, 8),
 GEN_VXFORM(vmuleuh, 4, 9),
 GEN_VXFORM_DUAL(vmuleuw, vmulhuw, 4, 10, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_310(vmulhud, 4, 11),
 GEN_VXFORM(vmulesb, 4, 12),
 GEN_VXFORM(vmulesh, 4, 13),
 GEN_VXFORM_DUAL(vmulesw, vmulhsw, 4, 14, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_310(vmulhsd, 4, 15),
 GEN_VXFORM(vslb, 2, 4),
 GEN_VXFORM(vslh, 2, 5),
 GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE),

From 8dcdb535d7cc4ba6270bb756e12e1d323254ed4e Mon Sep 17 00:00:00 2001
From: Matthieu Bucchianeri <matthieu.bucchianeri@leostella.com>
Date: Mon, 27 Jul 2020 10:55:53 -0700
Subject: [PATCH 15/40] target/ppc: Fix SPE unavailable exception triggering

When emulating certain floating point instructions or vector instructions on
PowerPC machines, QEMU did not properly generate the SPE/Embedded Floating-
Point Unavailable interrupt. See the buglink further below for references to
the relevant NXP documentation.

This patch fixes the behavior of some evfs* instructions that were
incorrectly emitting the interrupt.

More importantly, this patch fixes the behavior of several efd* and ev*
instructions that were not generating the interrupt. Triggering the
interrupt for these instructions fixes lazy FPU/vector context switching on
some operating systems like Linux.

Without this patch, the result of some double-precision arithmetic could be
corrupted due to the lack of proper saving and restoring of the upper
32-bit part of the general-purpose registers.

Buglink: https://bugs.launchpad.net/qemu/+bug/1888918
Buglink: https://bugs.launchpad.net/qemu/+bug/1611394
Signed-off-by: Matthieu Bucchianeri <matthieu.bucchianeri@leostella.com>
Message-Id: <20200727175553.32276-1-matthieu.bucchianeri@leostella.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/translate/spe-impl.inc.c | 97 +++++++++++++++++++----------
 1 file changed, 64 insertions(+), 33 deletions(-)

diff --git a/target/ppc/translate/spe-impl.inc.c b/target/ppc/translate/spe-impl.inc.c
index 42a0d1cffb..2e6e799a25 100644
--- a/target/ppc/translate/spe-impl.inc.c
+++ b/target/ppc/translate/spe-impl.inc.c
@@ -349,14 +349,24 @@ static inline void gen_evmergelohi(DisasContext *ctx)
 }
 static inline void gen_evsplati(DisasContext *ctx)
 {
-    uint64_t imm = ((int32_t)(rA(ctx->opcode) << 27)) >> 27;
+    uint64_t imm;
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    imm = ((int32_t)(rA(ctx->opcode) << 27)) >> 27;
 
     tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm);
     tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm);
 }
 static inline void gen_evsplatfi(DisasContext *ctx)
 {
-    uint64_t imm = rA(ctx->opcode) << 27;
+    uint64_t imm;
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    imm = rA(ctx->opcode) << 27;
 
     tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm);
     tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm);
@@ -389,21 +399,37 @@ static inline void gen_evsel(DisasContext *ctx)
 
 static void gen_evsel0(DisasContext *ctx)
 {
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
     gen_evsel(ctx);
 }
 
 static void gen_evsel1(DisasContext *ctx)
 {
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
     gen_evsel(ctx);
 }
 
 static void gen_evsel2(DisasContext *ctx)
 {
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
     gen_evsel(ctx);
 }
 
 static void gen_evsel3(DisasContext *ctx)
 {
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
     gen_evsel(ctx);
 }
 
@@ -518,6 +544,11 @@ static inline void gen_evmwsmia(DisasContext *ctx)
 {
     TCGv_i64 tmp;
 
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
     gen_evmwsmi(ctx);            /* rD := rA * rB */
 
     tmp = tcg_temp_new_i64();
@@ -534,6 +565,11 @@ static inline void gen_evmwsmiaa(DisasContext *ctx)
     TCGv_i64 acc;
     TCGv_i64 tmp;
 
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
     gen_evmwsmi(ctx);           /* rD := rA * rB */
 
     acc = tcg_temp_new_i64();
@@ -892,8 +928,14 @@ static inline void gen_##name(DisasContext *ctx)                              \
 #define GEN_SPEFPUOP_CONV_32_64(name)                                         \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
-    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
-    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
+    TCGv_i64 t0;                                                              \
+    TCGv_i32 t1;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i64();                                                  \
+    t1 = tcg_temp_new_i32();                                                  \
     gen_load_gpr64(t0, rB(ctx->opcode));                                      \
     gen_helper_##name(t1, cpu_env, t0);                                       \
     tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);                        \
@@ -903,8 +945,14 @@ static inline void gen_##name(DisasContext *ctx)                              \
 #define GEN_SPEFPUOP_CONV_64_32(name)                                         \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
-    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
-    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
+    TCGv_i64 t0;                                                              \
+    TCGv_i32 t1;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i64();                                                  \
+    t1 = tcg_temp_new_i32();                                                  \
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
     gen_helper_##name(t0, cpu_env, t1);                                       \
     gen_store_gpr64(rD(ctx->opcode), t0);                                     \
@@ -914,7 +962,12 @@ static inline void gen_##name(DisasContext *ctx)                              \
 #define GEN_SPEFPUOP_CONV_64_64(name)                                         \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
-    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
+    TCGv_i64 t0;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i64();                                                  \
     gen_load_gpr64(t0, rB(ctx->opcode));                                      \
     gen_helper_##name(t0, cpu_env, t0);                                       \
     gen_store_gpr64(rD(ctx->opcode), t0);                                     \
@@ -923,13 +976,8 @@ static inline void gen_##name(DisasContext *ctx)                              \
 #define GEN_SPEFPUOP_ARITH2_32_32(name)                                       \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
-    TCGv_i32 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-    t1 = tcg_temp_new_i32();                                                  \
+    TCGv_i32 t0 = tcg_temp_new_i32();                                         \
+    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
     gen_helper_##name(t0, cpu_env, t0, t1);                                   \
@@ -958,13 +1006,8 @@ static inline void gen_##name(DisasContext *ctx)                              \
 #define GEN_SPEFPUOP_COMP_32(name)                                            \
 static inline void gen_##name(DisasContext *ctx)                              \
 {                                                                             \
-    TCGv_i32 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-    t1 = tcg_temp_new_i32();                                                  \
+    TCGv_i32 t0 = tcg_temp_new_i32();                                         \
+    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
                                                                               \
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
@@ -1074,28 +1117,16 @@ GEN_SPEFPUOP_ARITH2_32_32(efsmul);
 GEN_SPEFPUOP_ARITH2_32_32(efsdiv);
 static inline void gen_efsabs(DisasContext *ctx)
 {
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
     tcg_gen_andi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
                     (target_long)~0x80000000LL);
 }
 static inline void gen_efsnabs(DisasContext *ctx)
 {
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
     tcg_gen_ori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
                    0x80000000);
 }
 static inline void gen_efsneg(DisasContext *ctx)
 {
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
     tcg_gen_xori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
                     0x80000000);
 }

From 61f5e1a34daebc0d92073bfc8159bee4855a1397 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <danielhb413@gmail.com>
Date: Mon, 3 Aug 2020 10:34:40 -0300
Subject: [PATCH 16/40] docs: adding NUMA documentation for pseries

This patch adds a new documentation file, ppc-spapr-numa.rst,
informing what developers and user can expect of the NUMA distance
support for the pseries machine, up to QEMU 5.1.

In the (hopefully soon) future, when we rework the NUMA mechanics
of the pseries machine to at least attempt to contemplate user
choice, this doc will be extended to inform about the new
support.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Message-Id: <20200803133440.825276-1-danielhb413@gmail.com>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 docs/specs/index.rst          |   1 +
 docs/specs/ppc-spapr-numa.rst | 191 ++++++++++++++++++++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 docs/specs/ppc-spapr-numa.rst

diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 426632a475..1b0eb979d5 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -12,6 +12,7 @@ Contents:
 
    ppc-xive
    ppc-spapr-xive
+   ppc-spapr-numa
    acpi_hw_reduced_hotplug
    tpm
    acpi_hest_ghes
diff --git a/docs/specs/ppc-spapr-numa.rst b/docs/specs/ppc-spapr-numa.rst
new file mode 100644
index 0000000000..e762038022
--- /dev/null
+++ b/docs/specs/ppc-spapr-numa.rst
@@ -0,0 +1,191 @@
+
+NUMA mechanics for sPAPR (pseries machines)
+============================================
+
+NUMA in sPAPR works different than the System Locality Distance
+Information Table (SLIT) in ACPI. The logic is explained in the LOPAPR
+1.1 chapter 15, "Non Uniform Memory Access (NUMA) Option". This
+document aims to complement this specification, providing details
+of the elements that impacts how QEMU views NUMA in pseries.
+
+Associativity and ibm,associativity property
+--------------------------------------------
+
+Associativity is defined as a group of platform resources that has
+similar mean performance (or in our context here, distance) relative to
+everyone else outside of the group.
+
+The format of the ibm,associativity property varies with the value of
+bit 0 of byte 5 of the ibm,architecture-vec-5 property. The format with
+bit 0 equal to zero is deprecated. The current format, with the bit 0
+with the value of one, makes ibm,associativity property represent the
+physical hierarchy of the platform, as one or more lists that starts
+with the highest level grouping up to the smallest. Considering the
+following topology:
+
+::
+
+    Mem M1 ---- Proc P1    |
+    -----------------      | Socket S1  ---|
+          chip C1          |               |
+                                           | HW module 1 (MOD1)
+    Mem M2 ---- Proc P2    |               |
+    -----------------      | Socket S2  ---|
+          chip C2          |
+
+The ibm,associativity property for the processors would be:
+
+* P1: {MOD1, S1, C1, P1}
+* P2: {MOD1, S2, C2, P2}
+
+Each allocable resource has an ibm,associativity property. The LOPAPR
+specification allows multiple lists to be present in this property,
+considering that the same resource can have multiple connections to the
+platform.
+
+Relative Performance Distance and ibm,associativity-reference-points
+--------------------------------------------------------------------
+
+The ibm,associativity-reference-points property is an array that is used
+to define the relevant performance/distance  related boundaries, defining
+the NUMA levels for the platform.
+
+The definition of its elements also varies with the value of bit 0 of byte 5
+of the ibm,architecture-vec-5 property. The format with bit 0 equal to zero
+is also deprecated. With the current format, each integer of the
+ibm,associativity-reference-points represents an 1 based ordinal index (i.e.
+the first element is 1) of the ibm,associativity array. The first
+boundary is the most significant to application performance, followed by
+less significant boundaries. Allocated resources that belongs to the
+same performance boundaries are expected to have relative NUMA distance
+that matches the relevancy of the boundary itself. Resources that belongs
+to the same first boundary will have the shortest distance from each
+other. Subsequent boundaries represents greater distances and degraded
+performance.
+
+Using the previous example, the following setting reference points defines
+three NUMA levels:
+
+* ibm,associativity-reference-points = {0x3, 0x2, 0x1}
+
+The first NUMA level (0x3) is interpreted as the third element of each
+ibm,associativity array, the second level is the second element and
+the third level is the first element. Let's also consider that elements
+belonging to the first NUMA level have distance equal to 10 from each
+other, and each NUMA level doubles the distance from the previous. This
+means that the second would be 20 and the third level 40. For the P1 and
+P2 processors, we would have the following NUMA levels:
+
+::
+
+  * ibm,associativity-reference-points = {0x3, 0x2, 0x1}
+
+  * P1: associativity{MOD1, S1, C1, P1}
+
+  First NUMA level (0x3) => associativity[2] = C1
+  Second NUMA level (0x2) => associativity[1] = S1
+  Third NUMA level (0x1) => associativity[0] = MOD1
+
+  * P2: associativity{MOD1, S2, C2, P2}
+
+  First NUMA level (0x3) => associativity[2] = C2
+  Second NUMA level (0x2) => associativity[1] = S2
+  Third NUMA level (0x1) => associativity[0] = MOD1
+
+  P1 and P2 have the same third NUMA level, MOD1: Distance between them = 40
+
+Changing the ibm,associativity-reference-points array changes the performance
+distance attributes for the same associativity arrays, as the following
+example illustrates:
+
+::
+
+  * ibm,associativity-reference-points = {0x2}
+
+  * P1: associativity{MOD1, S1, C1, P1}
+
+  First NUMA level (0x2) => associativity[1] = S1
+
+  * P2: associativity{MOD1, S2, C2, P2}
+
+  First NUMA level (0x2) => associativity[1] = S2
+
+  P1 and P2 does not have a common performance boundary. Since this is a one level
+  NUMA configuration, distance between them is one boundary above the first
+  level, 20.
+
+
+In a hypothetical platform where all resources inside the same hardware module
+is considered to be on the same performance boundary:
+
+::
+
+  * ibm,associativity-reference-points = {0x1}
+
+  * P1: associativity{MOD1, S1, C1, P1}
+
+  First NUMA level (0x1) => associativity[0] = MOD0
+
+  * P2: associativity{MOD1, S2, C2, P2}
+
+  First NUMA level (0x1) => associativity[0] = MOD0
+
+  P1 and P2 belongs to the same first order boundary. The distance between then
+  is 10.
+
+
+How the pseries Linux guest calculates NUMA distances
+=====================================================
+
+Another key difference between ACPI SLIT and the LOPAPR regarding NUMA is
+how the distances are expressed. The SLIT table provides the NUMA distance
+value between the relevant resources. LOPAPR does not provide a standard
+way to calculate it. We have the ibm,associativity for each resource, which
+provides a common-performance hierarchy,  and the ibm,associativity-reference-points
+array that tells which level of associativity is considered to be relevant
+or not.
+
+The result is that each OS is free to implement and to interpret the distance
+as it sees fit. For the pseries Linux guest, each level of NUMA duplicates
+the distance of the previous level, and the maximum amount of levels is
+limited to MAX_DISTANCE_REF_POINTS = 4 (from arch/powerpc/mm/numa.c in the
+kernel tree). This results in the following distances:
+
+* both resources in the first NUMA level: 10
+* resources one NUMA level apart: 20
+* resources two NUMA levels apart: 40
+* resources three NUMA levels apart: 80
+* resources four NUMA levels apart: 160
+
+
+Consequences for QEMU NUMA tuning
+---------------------------------
+
+The way the pseries Linux guest calculates NUMA distances has a direct effect
+on what QEMU users can expect when doing NUMA tuning. As of QEMU 5.1, this is
+the default ibm,associativity-reference-points being used in the pseries
+machine:
+
+ibm,associativity-reference-points = {0x4, 0x4, 0x2}
+
+The first and second level are equal, 0x4, and a third one was added in
+commit a6030d7e0b35 exclusively for NVLink GPUs support. This means that
+regardless of how the ibm,associativity properties are being created in
+the device tree, the pseries Linux guest will only recognize three scenarios
+as far as NUMA distance goes:
+
+* if the resources belongs to the same first NUMA level = 10
+* second level is skipped since it's equal to the first
+* all resources that aren't a NVLink GPU, it is guaranteed that they will belong
+  to the same third NUMA level, having distance = 40
+* for NVLink GPUs, distance = 80 from everything else
+
+In short, we can summarize the NUMA distances seem in pseries Linux guests, using
+QEMU up to 5.1, as follows:
+
+* local distance, i.e. the distance of the resource to its own NUMA node: 10
+* if it's a NVLink GPU device, distance: 80
+* every other resource, distance: 40
+
+This also means that user input in QEMU command line does not change the
+NUMA distancing inside the guest for the pseries machine.

From 8d14523b1c98e7e64844a997d07dffcc32c83bef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Tue, 4 Aug 2020 15:16:39 +0200
Subject: [PATCH 17/40] docs: Update POWER9 XIVE support for nested guests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not yet supported.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20200804131639.407049-1-clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 docs/specs/ppc-spapr-xive.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/specs/ppc-spapr-xive.rst b/docs/specs/ppc-spapr-xive.rst
index 6159bc6eed..7199db730b 100644
--- a/docs/specs/ppc-spapr-xive.rst
+++ b/docs/specs/ppc-spapr-xive.rst
@@ -61,6 +61,11 @@ depend on the XIVE KVM capability of the host. On older kernels
 without XIVE KVM support, QEMU will use the emulated XIVE device as a
 fallback and on newer kernels (>=5.2), the KVM XIVE device.
 
+XIVE native exploitation mode is not supported for KVM nested guests,
+VMs running under a L1 hypervisor (KVM on pSeries). In that case, the
+hypervisor will not advertise the KVM capability and QEMU will use the
+emulated XIVE device, same as for older versions of KVM.
+
 As a final refinement, the user can also switch the use of the KVM
 device with the machine option ``kernel_irqchip``.
 

From c55bcb1f47071a134a4b96b4137cccca831ac5cf Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Wed, 5 Aug 2020 17:47:16 +0200
Subject: [PATCH 18/40] spapr: Clarify error and documentation for broken KVM
 XICS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When starting an L2 KVM guest with `ic-mode=dual,kernel-irqchip=on`,
QEMU fails with:

KVM is too old to support ic-mode=dual,kernel-irqchip=on

This error message was introduced to detect older KVM versions that
didn't allow destruction and re-creation of the XICS KVM device that
we do at reboot. But it is actually the same issue that we get with
nested guests : when running under pseries, KVM currently provides
a genuine XICS device (not the XICS-on-XIVE device that we get
under powernv) which doesn't support destruction/re-creation.

This will eventually be fixed in KVM but in the meantime, update
the error message and documentation to mention the nested case.
While here, mention that in "No XIVE support in KVM" section that
this can also happen with "guest OSes supporting XIVE" since
we check this at init time before starting the guest.

Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Buglink: https://bugs.launchpad.net/qemu/+bug/1890290
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159664243614.622889.18307368735989783528.stgit@bahia.lan>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 docs/specs/ppc-spapr-xive.rst |  5 ++++-
 hw/ppc/spapr_irq.c            | 12 +++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/specs/ppc-spapr-xive.rst b/docs/specs/ppc-spapr-xive.rst
index 7199db730b..7144347560 100644
--- a/docs/specs/ppc-spapr-xive.rst
+++ b/docs/specs/ppc-spapr-xive.rst
@@ -126,6 +126,9 @@ xics            XICS KVM       XICS emul.     XICS KVM
 
 (1) QEMU warns with ``warning: kernel_irqchip requested but unavailable:
     IRQ_XIVE capability must be present for KVM``
+    In some cases (old host kernels or KVM nested guests), one may hit a
+    QEMU/KVM incompatibility due to device destruction in reset. QEMU fails
+    with ``KVM is incompatible with ic-mode=dual,kernel-irqchip=on``
 (2) QEMU fails with ``kernel_irqchip requested but unavailable:
     IRQ_XIVE capability must be present for KVM``
 
@@ -148,7 +151,7 @@ xics            XICS KVM       XICS emul.     XICS KVM
     mode (XICS), either don't set the ic-mode machine property or try
     ic-mode=xics or ic-mode=dual``
 (4) QEMU/KVM incompatibility due to device destruction in reset. QEMU fails
-    with ``KVM is too old to support ic-mode=dual,kernel-irqchip=on``
+    with ``KVM is incompatible with ic-mode=dual,kernel-irqchip=on``
 
 
 XIVE Device tree properties
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 2f8f7d62f8..72bb938375 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -139,6 +139,7 @@ SpaprIrq spapr_irq_dual = {
 
 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
 {
+    ERRP_GUARD();
     MachineState *machine = MACHINE(spapr);
 
     /*
@@ -179,14 +180,19 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
 
     /*
      * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
-     * re-created. Detect that early to avoid QEMU to exit later when the
-     * guest reboots.
+     * re-created. Same happens with KVM nested guests. Detect that early to
+     * avoid QEMU to exit later when the guest reboots.
      */
     if (kvm_enabled() &&
         spapr->irq == &spapr_irq_dual &&
         kvm_kernel_irqchip_required() &&
         xics_kvm_has_broken_disconnect(spapr)) {
-        error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
+        error_setg(errp,
+            "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
+        error_append_hint(errp,
+            "This can happen with an old KVM or in a KVM nested guest.\n");
+        error_append_hint(errp,
+            "Try without kernel-irqchip or with kernel-irqchip=off.\n");
         return -1;
     }
 

From 82f086b5e7ec0adff5a3972f74c446325b4fef9a Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 6 Aug 2020 18:56:05 +0200
Subject: [PATCH 19/40] spapr/xive: Fix xive->fd if kvm_create_device() fails

If the creation of the KVM XIVE device fails for some reasons, the
negative errno ends up in xive->fd, but the rest of the code assumes
that xive->fd either contains an open fd, ie. positive value, or -1.

This doesn't cause any misbehavior except kvmppc_xive_disconnect()
that will try to close(xive->fd) during rollback and likely be
rewarded with an EBADF.

Only set xive->fd with a open fd.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159673296585.766512.15404407281299745442.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index edb7ee0e74..d55ea4670e 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -745,6 +745,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
     size_t tima_len = 4ull << TM_SHIFT;
     CPUState *cs;
+    int fd;
 
     /*
      * The KVM XIVE device already in use. This is the case when
@@ -760,11 +761,12 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     }
 
     /* First, create the KVM XIVE device */
-    xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
-    if (xive->fd < 0) {
-        error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device");
+    fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false);
+    if (fd < 0) {
+        error_setg_errno(errp, -fd, "XIVE: error creating KVM device");
         return -1;
     }
+    xive->fd = fd;
 
     /* Tell KVM about the # of VCPUs we may have */
     if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL,

From e781139539f3d73ac00f2aea17f5a154b10e4302 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 6 Aug 2020 18:56:13 +0200
Subject: [PATCH 20/40] spapr/xive: Simplify kvmppc_xive_disconnect()

Since this function begins with:

    /* The KVM XIVE device is not in use */
    if (!xive || xive->fd == -1) {
        return;
    }

we obviously don't need to check xive->fd again.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159673297296.766512.14780055521619233656.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index d55ea4670e..893a1ee77e 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -873,10 +873,8 @@ void kvmppc_xive_disconnect(SpaprInterruptController *intc)
      * and removed from the list of devices of the VM. The VCPU
      * presenters are also detached from the device.
      */
-    if (xive->fd != -1) {
-        close(xive->fd);
-        xive->fd = -1;
-    }
+    close(xive->fd);
+    xive->fd = -1;
 
     kvm_kernel_irqchip = false;
     kvm_msi_via_irqfd_allowed = false;

From a72c71b77d73f9888cbe8da8be09d7097ebca64f Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.ibm.com>
Date: Tue, 11 Aug 2020 12:32:35 -0300
Subject: [PATCH 21/40] target/ppc: Integrate icount to purr, vtb, and tbu40

Currently if option '-icount auto' is passed to the QEMU TCG to enable
counting instructions the VM crashes with the following error report when
Linux runs on it:

qemu-system-ppc64: Bad icount read

This happens because read/write access to the SPRs PURR, VTB, and TBU40
is not integrated to the icount framework.

This commit fixes that issue by making the read/write access of these
SPRs aware of icount framework, adding the proper gen_io_start() calls
before calling the helpers to load/store these SPRs in TCG and ensuring
that the associated TBs end immediately after, accordingly to what's in
docs/devel/tcg-icount.rst.

Signed-off-by: Gustavo Romero <gromero@linux.ibm.com>
Message-Id: <20200811153235.4527-1-gromero@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 target/ppc/translate_init.inc.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 5134123dd6..230a062d29 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -284,12 +284,24 @@ static void spr_write_atbu(DisasContext *ctx, int sprn, int gprn)
 ATTRIBUTE_UNUSED
 static void spr_read_purr(DisasContext *ctx, int gprn, int sprn)
 {
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
     gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_stop_exception(ctx);
+    }
 }
 
 static void spr_write_purr(DisasContext *ctx, int sprn, int gprn)
 {
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
     gen_helper_store_purr(cpu_env, cpu_gpr[gprn]);
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_stop_exception(ctx);
+    }
 }
 
 /* HDECR */
@@ -319,17 +331,35 @@ static void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn)
 
 static void spr_read_vtb(DisasContext *ctx, int gprn, int sprn)
 {
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
     gen_helper_load_vtb(cpu_gpr[gprn], cpu_env);
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_stop_exception(ctx);
+    }
 }
 
 static void spr_write_vtb(DisasContext *ctx, int sprn, int gprn)
 {
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
     gen_helper_store_vtb(cpu_env, cpu_gpr[gprn]);
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_stop_exception(ctx);
+    }
 }
 
 static void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn)
 {
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
     gen_helper_store_tbu40(cpu_env, cpu_gpr[gprn]);
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_stop_exception(ctx);
+    }
 }
 
 #endif

From cf36e5b3760df0a1fdd38970294cf7b0968fcc5c Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 7 Aug 2020 13:32:06 +0200
Subject: [PATCH 22/40] ppc/xive: Rework setup of XiveSource::esb_mmio
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Depending on whether XIVE is emultated or backed with a KVM XIVE device,
the ESB MMIOs of a XIVE source point to an I/O memory region or a mapped
memory region.

This is currently handled by checking kvm_irqchip_in_kernel() returns
false in xive_source_realize(). This is a bit awkward as we usually
need to do extra things when we're using the in-kernel backend, not
less. But most important, we can do better: turn the existing "xive.esb"
memory region into a plain container, introduce an "xive.esb-emulated"
I/O subregion and rename the existing "xive.esb" subregion in the KVM
code to "xive.esb-kvm". Since "xive.esb-kvm" is added with overlap
and a higher priority, it prevails over "xive.esb-emulated" (ie.
a guest using KVM XIVE will interact with "xive.esb-kvm" instead of
the default "xive.esb-emulated" region.

While here, consolidate the computation of the MMIO region size in
a common helper.

Suggested-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159679992680.876294.7520540158586170894.stgit@bahia.lan>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c |  4 ++--
 hw/intc/xive.c           | 11 ++++++-----
 include/hw/ppc/xive.h    |  6 ++++++
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 893a1ee77e..6130882be6 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -742,7 +742,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     SpaprXive *xive = SPAPR_XIVE(intc);
     XiveSource *xsrc = &xive->source;
     Error *local_err = NULL;
-    size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+    size_t esb_len = xive_source_esb_len(xsrc);
     size_t tima_len = 4ull << TM_SHIFT;
     CPUState *cs;
     int fd;
@@ -788,7 +788,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     }
 
     memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc),
-                                      "xive.esb", esb_len, xsrc->esb_mmap);
+                                      "xive.esb-kvm", esb_len, xsrc->esb_mmap);
     memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0,
                                         &xsrc->esb_mmio_kvm, 1);
 
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 9b55e0356c..561d746cd1 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1128,6 +1128,7 @@ static void xive_source_reset(void *dev)
 static void xive_source_realize(DeviceState *dev, Error **errp)
 {
     XiveSource *xsrc = XIVE_SOURCE(dev);
+    size_t esb_len = xive_source_esb_len(xsrc);
 
     assert(xsrc->xive);
 
@@ -1147,11 +1148,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp)
     xsrc->status = g_malloc0(xsrc->nr_irqs);
     xsrc->lsi_map = bitmap_new(xsrc->nr_irqs);
 
-    if (!kvm_irqchip_in_kernel()) {
-        memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
-                              &xive_source_esb_ops, xsrc, "xive.esb",
-                              (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
-    }
+    memory_region_init(&xsrc->esb_mmio, OBJECT(xsrc), "xive.esb", esb_len);
+    memory_region_init_io(&xsrc->esb_mmio_emulated, OBJECT(xsrc),
+                          &xive_source_esb_ops, xsrc, "xive.esb-emulated",
+                          esb_len);
+    memory_region_add_subregion(&xsrc->esb_mmio, 0, &xsrc->esb_mmio_emulated);
 
     qemu_register_reset(xive_source_reset, dev);
 }
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 705cf48176..82a61eaca7 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -191,6 +191,7 @@ typedef struct XiveSource {
     uint64_t        esb_flags;
     uint32_t        esb_shift;
     MemoryRegion    esb_mmio;
+    MemoryRegion    esb_mmio_emulated;
 
     /* KVM support */
     void            *esb_mmap;
@@ -215,6 +216,11 @@ static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
         xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
 }
 
+static inline size_t xive_source_esb_len(XiveSource *xsrc)
+{
+    return (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+}
+
 /* The trigger page is always the first/even page */
 static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
 {

From e519cdd9bc02990bddd395656bdaec821c94c8fe Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 7 Aug 2020 13:32:14 +0200
Subject: [PATCH 23/40] ppc/xive: Introduce dedicated kvm_irqchip_in_kernel()
 wrappers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calls to the KVM XIVE device are guarded by kvm_irqchip_in_kernel(). This
ensures that QEMU won't try to use the device if KVM is disabled or if
an in-kernel irqchip isn't required.

When using ic-mode=dual with the pseries machine, we have two possible
interrupt controllers: XIVE and XICS. The kvm_irqchip_in_kernel() helper
will return true as soon as any of the KVM device is created. It might
lure QEMU to think that the other one is also around, while it is not.
This is exactly what happens with ic-mode=dual at machine init when
claiming IRQ numbers, which must be done on all possible IRQ backends,
eg. RTAS event sources or the PHB0 LSI table : only the KVM XICS device
is active but we end up calling kvmppc_xive_source_reset_one() anyway,
which fails. This doesn't cause any trouble because of another bug :
kvmppc_xive_source_reset_one() lacks an error_setg() and callers don't
see the failure.

Most of the other kvmppc_xive_* functions have similar xive->fd
checks to filter out the case when KVM XIVE isn't active. It
might look safer to have idempotent functions but it doesn't
really help to understand what's going on when debugging.

Since we already have all the kvm_irqchip_in_kernel() in place,
also have the callers to check xive->fd as well before calling
KVM XIVE specific code. This is straight-forward for the spapr
specific XIVE code. Some more care is needed for the platform
agnostic XIVE code since it cannot access xive->fd directly.
Introduce new in_kernel() methods in some base XIVE classes
for this purpose and implement them only in spapr.

In all cases, we still need to call kvm_irqchip_in_kernel() so that
compilers can optimize the kvmppc_xive_* calls away when CONFIG_KVM
isn't defined, thus avoiding the need for stubs.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159679993438.876294.7285654331498605426.stgit@bahia.lan>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive.c  | 45 +++++++++++++++++++++++++++++--------------
 hw/intc/xive.c        | 25 ++++++++++++++++++------
 include/hw/ppc/xive.h |  1 +
 3 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 89c8cd9667..3c84f64dc4 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -148,12 +148,19 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end,
     xive_end_queue_pic_print_info(end, 6, mon);
 }
 
+/*
+ * kvm_irqchip_in_kernel() will cause the compiler to turn this
+ * info a nop if CONFIG_KVM isn't defined.
+ */
+#define spapr_xive_in_kernel(xive) \
+    (kvm_irqchip_in_kernel() && (xive)->fd != -1)
+
 void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
 {
     XiveSource *xsrc = &xive->source;
     int i;
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
 
         kvmppc_xive_synchronize_state(xive, &local_err);
@@ -507,8 +514,10 @@ static const VMStateDescription vmstate_spapr_xive_eas = {
 
 static int vmstate_spapr_xive_pre_save(void *opaque)
 {
-    if (kvm_irqchip_in_kernel()) {
-        return kvmppc_xive_pre_save(SPAPR_XIVE(opaque));
+    SpaprXive *xive = SPAPR_XIVE(opaque);
+
+    if (spapr_xive_in_kernel(xive)) {
+        return kvmppc_xive_pre_save(xive);
     }
 
     return 0;
@@ -520,8 +529,10 @@ static int vmstate_spapr_xive_pre_save(void *opaque)
  */
 static int spapr_xive_post_load(SpaprInterruptController *intc, int version_id)
 {
-    if (kvm_irqchip_in_kernel()) {
-        return kvmppc_xive_post_load(SPAPR_XIVE(intc), version_id);
+    SpaprXive *xive = SPAPR_XIVE(intc);
+
+    if (spapr_xive_in_kernel(xive)) {
+        return kvmppc_xive_post_load(xive, version_id);
     }
 
     return 0;
@@ -564,7 +575,7 @@ static int spapr_xive_claim_irq(SpaprInterruptController *intc, int lisn,
         xive_source_irq_set_lsi(xsrc, lisn);
     }
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         return kvmppc_xive_source_reset_one(xsrc, lisn, errp);
     }
 
@@ -641,7 +652,7 @@ static void spapr_xive_set_irq(SpaprInterruptController *intc, int irq, int val)
 {
     SpaprXive *xive = SPAPR_XIVE(intc);
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         kvmppc_xive_source_set_irq(&xive->source, irq, val);
     } else {
         xive_source_set_irq(&xive->source, irq, val);
@@ -749,11 +760,16 @@ static void spapr_xive_deactivate(SpaprInterruptController *intc)
 
     spapr_xive_mmio_set_enabled(xive, false);
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         kvmppc_xive_disconnect(intc);
     }
 }
 
+static bool spapr_xive_in_kernel_xptr(const XivePresenter *xptr)
+{
+    return spapr_xive_in_kernel(SPAPR_XIVE(xptr));
+}
+
 static void spapr_xive_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -788,6 +804,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data)
     sicc->post_load = spapr_xive_post_load;
 
     xpc->match_nvt  = spapr_xive_match_nvt;
+    xpc->in_kernel  = spapr_xive_in_kernel_xptr;
 }
 
 static const TypeInfo spapr_xive_info = {
@@ -1058,7 +1075,7 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu,
         new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
     }
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
 
         kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err);
@@ -1379,7 +1396,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu,
      */
 
 out:
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
 
         kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err);
@@ -1480,7 +1497,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu,
         args[2] = 0;
     }
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
 
         kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err);
@@ -1642,7 +1659,7 @@ static target_ulong h_int_esb(PowerPCCPU *cpu,
         return H_P3;
     }
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data,
                                      flags & SPAPR_XIVE_ESB_STORE);
     } else {
@@ -1717,7 +1734,7 @@ static target_ulong h_int_sync(PowerPCCPU *cpu,
      * under KVM
      */
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
 
         kvmppc_xive_sync_source(xive, lisn, &local_err);
@@ -1761,7 +1778,7 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
 
     device_legacy_reset(DEVICE(xive));
 
-    if (kvm_irqchip_in_kernel()) {
+    if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
 
         kvmppc_xive_reset(xive, &local_err);
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 561d746cd1..a453e8f4dc 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -592,6 +592,17 @@ static const char * const xive_tctx_ring_names[] = {
     "USER", "OS", "POOL", "PHYS",
 };
 
+/*
+ * kvm_irqchip_in_kernel() will cause the compiler to turn this
+ * info a nop if CONFIG_KVM isn't defined.
+ */
+#define xive_in_kernel(xptr)                                            \
+    (kvm_irqchip_in_kernel() &&                                         \
+     ({                                                                 \
+         XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);      \
+         xpc->in_kernel ? xpc->in_kernel(xptr) : false;                 \
+     }))
+
 void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
 {
     int cpu_index;
@@ -606,7 +617,7 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
 
     cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
 
-    if (kvm_irqchip_in_kernel()) {
+    if (xive_in_kernel(tctx->xptr)) {
         Error *local_err = NULL;
 
         kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
@@ -671,7 +682,7 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
     }
 
     /* Connect the presenter to the VCPU (required for CPU hotplug) */
-    if (kvm_irqchip_in_kernel()) {
+    if (xive_in_kernel(tctx->xptr)) {
         kvmppc_xive_cpu_connect(tctx, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
@@ -682,10 +693,11 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
 
 static int vmstate_xive_tctx_pre_save(void *opaque)
 {
+    XiveTCTX *tctx = XIVE_TCTX(opaque);
     Error *local_err = NULL;
 
-    if (kvm_irqchip_in_kernel()) {
-        kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err);
+    if (xive_in_kernel(tctx->xptr)) {
+        kvmppc_xive_cpu_get_state(tctx, &local_err);
         if (local_err) {
             error_report_err(local_err);
             return -1;
@@ -697,14 +709,15 @@ static int vmstate_xive_tctx_pre_save(void *opaque)
 
 static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
 {
+    XiveTCTX *tctx = XIVE_TCTX(opaque);
     Error *local_err = NULL;
 
-    if (kvm_irqchip_in_kernel()) {
+    if (xive_in_kernel(tctx->xptr)) {
         /*
          * Required for hotplugged CPU, for which the state comes
          * after all states of the machine.
          */
-        kvmppc_xive_cpu_set_state(XIVE_TCTX(opaque), &local_err);
+        kvmppc_xive_cpu_set_state(tctx, &local_err);
         if (local_err) {
             error_report_err(local_err);
             return -1;
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 82a61eaca7..2f3c5af810 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -402,6 +402,7 @@ typedef struct XivePresenterClass {
                      uint8_t nvt_blk, uint32_t nvt_idx,
                      bool cam_ignore, uint8_t priority,
                      uint32_t logic_serv, XiveTCTXMatch *match);
+    bool (*in_kernel)(const XivePresenter *xptr);
 } XivePresenterClass;
 
 int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,

From a4907119348d928e8b72e2d3b7566e50f272fa09 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 7 Aug 2020 13:32:21 +0200
Subject: [PATCH 24/40] spapr/xive: Convert KVM device fd checks to assert()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All callers guard these functions with an xive_in_kernel() helper. Make
it clear that they are only to be called when the KVM XIVE device exists.

Note that the check on xive is dropped in kvmppc_xive_disconnect(). It
really cannot be NULL since it comes from set_active_intc() which only
passes pointers to allocated objects.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <159679994169.876294.11026653581505077112.stgit@bahia.lan>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 6130882be6..82a6f99f02 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -79,10 +79,7 @@ void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
     uint64_t state[2];
     int ret;
 
-    /* The KVM XIVE device is not in use yet */
-    if (xive->fd == -1) {
-        return;
-    }
+    assert(xive->fd != -1);
 
     /* word0 and word1 of the OS ring. */
     state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]);
@@ -101,10 +98,7 @@ void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
     uint64_t state[2] = { 0 };
     int ret;
 
-    /* The KVM XIVE device is not in use */
-    if (xive->fd == -1) {
-        return;
-    }
+    assert(xive->fd != -1);
 
     ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
     if (ret != 0) {
@@ -156,10 +150,7 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
     unsigned long vcpu_id;
     int ret;
 
-    /* The KVM XIVE device is not in use */
-    if (xive->fd == -1) {
-        return;
-    }
+    assert(xive->fd != -1);
 
     /* Check if CPU was hot unplugged and replugged. */
     if (kvm_cpu_is_enabled(tctx->cs)) {
@@ -245,10 +236,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
     SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
     uint64_t state = 0;
 
-    /* The KVM XIVE device is not in use */
-    if (xive->fd == -1) {
-        return -ENODEV;
-    }
+    assert(xive->fd != -1);
 
     if (xive_source_irq_is_lsi(xsrc, srcno)) {
         state |= KVM_XIVE_LEVEL_SENSITIVE;
@@ -592,10 +580,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, int running,
 
 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
 {
-    /* The KVM XIVE device is not in use */
-    if (xive->fd == -1) {
-        return;
-    }
+    assert(xive->fd != -1);
 
     /*
      * When the VM is stopped, the sources are masked and the previous
@@ -622,10 +607,7 @@ int kvmppc_xive_pre_save(SpaprXive *xive)
 {
     Error *local_err = NULL;
 
-    /* The KVM XIVE device is not in use */
-    if (xive->fd == -1) {
-        return 0;
-    }
+    assert(xive->fd != -1);
 
     /* EAT: there is no extra state to query from KVM */
 
@@ -845,10 +827,7 @@ void kvmppc_xive_disconnect(SpaprInterruptController *intc)
     XiveSource *xsrc;
     size_t esb_len;
 
-    /* The KVM XIVE device is not in use */
-    if (!xive || xive->fd == -1) {
-        return;
-    }
+    assert(xive->fd != -1);
 
     /* Clear the KVM mapping */
     xsrc = &xive->source;

From 4a6891b838e4914d25ab97a2a03f946e3c085a8f Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:53:58 +0200
Subject: [PATCH 25/40] spapr: Simplify error handling in spapr_phb_realize()

The spapr_phb_realize() function has a local_err variable which
is used to:

1) check failures of spapr_irq_findone() and spapr_irq_claim()

2) prepend extra information to the error message

Recent work from Markus Armbruster highlighted we get better
code when testing the return value of a function, rather than
setting up all the local_err boiler plate. For similar reasons,
it is now preferred to use ERRP_GUARD() and error_prepend()
rather than error_propagate_prepend().

Since spapr_irq_findone() and spapr_irq_claim() return negative
values in case of failure, do both changes.

This is just cleanup, no functional impact.

Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <159707843851.1489912.6108405733810934642.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_pci.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 363cdb3f7b..0a418f1e67 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1796,6 +1796,7 @@ static void spapr_phb_destroy_msi(gpointer opaque)
 
 static void spapr_phb_realize(DeviceState *dev, Error **errp)
 {
+    ERRP_GUARD();
     /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
      * tries to add a sPAPR PHB to a non-pseries machine.
      */
@@ -1813,7 +1814,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     uint64_t msi_window_size = 4096;
     SpaprTceTable *tcet;
     const unsigned windows_supported = spapr_phb_windows_supported(sphb);
-    Error *local_err = NULL;
 
     if (!spapr) {
         error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine");
@@ -1964,13 +1964,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
 
     /* Initialize the LSI table */
     for (i = 0; i < PCI_NUM_PINS; i++) {
-        uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
+        int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
 
         if (smc->legacy_irq_allocation) {
-            irq = spapr_irq_findone(spapr, &local_err);
-            if (local_err) {
-                error_propagate_prepend(errp, local_err,
-                                        "can't allocate LSIs: ");
+            irq = spapr_irq_findone(spapr, errp);
+            if (irq < 0) {
+                error_prepend(errp, "can't allocate LSIs: ");
                 /*
                  * Older machines will never support PHB hotplug, ie, this is an
                  * init only path and QEMU will terminate. No need to rollback.
@@ -1979,9 +1978,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
             }
         }
 
-        spapr_irq_claim(spapr, irq, true, &local_err);
-        if (local_err) {
-            error_propagate_prepend(errp, local_err, "can't allocate LSIs: ");
+        if (spapr_irq_claim(spapr, irq, true, errp) < 0) {
+            error_prepend(errp, "can't allocate LSIs: ");
             goto unrealize;
         }
 

From 3885ca66881f1d2568e169dcbf793fd493146d14 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:05 +0200
Subject: [PATCH 26/40] spapr/xive: Rework error handling of
 kvmppc_xive_cpu_connect()

Use error_setg_errno() instead of error_setg(strerror()). While here,
use -ret instead of errno since kvm_vcpu_enable_cap() returns a negative
errno on failure.

Use ERRP_GUARD() to ensure that errp can be passed to error_append_hint(),
and get rid of the local_err boilerplate.

Propagate the return value so that callers may use it as well to check
failures.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707844549.1489912.4862921680328017645.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 21 ++++++++++-----------
 include/hw/ppc/xive.h    |  2 +-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 82a6f99f02..aa1a2f9153 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -144,8 +144,9 @@ void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
     }
 }
 
-void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
+int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
 {
+    ERRP_GUARD();
     SpaprXive *xive = SPAPR_XIVE(tctx->xptr);
     unsigned long vcpu_id;
     int ret;
@@ -154,7 +155,7 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
 
     /* Check if CPU was hot unplugged and replugged. */
     if (kvm_cpu_is_enabled(tctx->cs)) {
-        return;
+        return 0;
     }
 
     vcpu_id = kvm_arch_vcpu_id(tctx->cs);
@@ -162,20 +163,18 @@ void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
     ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd,
                               vcpu_id, 0);
     if (ret < 0) {
-        Error *local_err = NULL;
-
-        error_setg(&local_err,
-                   "XIVE: unable to connect CPU%ld to KVM device: %s",
-                   vcpu_id, strerror(errno));
-        if (errno == ENOSPC) {
-            error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n",
+        error_setg_errno(errp, -ret,
+                         "XIVE: unable to connect CPU%ld to KVM device",
+                         vcpu_id);
+        if (ret == -ENOSPC) {
+            error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n",
                               MACHINE(qdev_get_machine())->smp.max_cpus);
         }
-        error_propagate(errp, local_err);
-        return;
+        return ret;
     }
 
     kvm_cpu_enable(tctx->cs);
+    return 0;
 }
 
 /*
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 2f3c5af810..2d87ed4372 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -487,7 +487,7 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb);
 
 int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
-void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
 void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
 void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
 void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp);

From 46407a2531da4ff206c1aefe8c3f6d8ad53f2de4 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:12 +0200
Subject: [PATCH 27/40] spapr/xive: Rework error handling of
 kvmppc_xive_source_reset()

Since kvmppc_xive_source_reset_one() has a return value, convert
kvmppc_xive_source_reset() to use it for error checking. This
allows to get rid of the local_err boiler plate.

Propagate the return value so that callers may use it as well to check
failures.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707845245.1489912.9151822670764690034.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index aa1a2f9153..d801bf5cd1 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -248,24 +248,25 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
                              true, errp);
 }
 
-static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
+static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
 {
     SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
     int i;
 
     for (i = 0; i < xsrc->nr_irqs; i++) {
-        Error *local_err = NULL;
+        int ret;
 
         if (!xive_eas_is_valid(&xive->eat[i])) {
             continue;
         }
 
-        kvmppc_xive_source_reset_one(xsrc, i, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
+        ret = kvmppc_xive_source_reset_one(xsrc, i, errp);
+        if (ret < 0) {
+            return ret;
         }
     }
+
+    return 0;
 }
 
 /*

From b14adb4a27c80a255fb35451d7cb2bc70743e7f4 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:19 +0200
Subject: [PATCH 28/40] spapr/xive: Rework error handling of kvmppc_xive_mmap()

Callers currently check failures of kvmppc_xive_mmap() through the
@errp argument, which isn't a recommanded practice. It is preferred
to use a return value when possible.

Since NULL isn't an invalid address in theory, it seems better to
return MAP_FAILED and to teach callers to handle it.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707845972.1489912.719896767746375765.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index d801bf5cd1..b2a36fd59d 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -698,6 +698,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
     return 0;
 }
 
+/* Returns MAP_FAILED on error and sets errno */
 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
                               Error **errp)
 {
@@ -708,7 +709,6 @@ static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
                 pgoff << page_shift);
     if (addr == MAP_FAILED) {
         error_setg_errno(errp, errno, "XIVE: unable to set memory mapping");
-        return NULL;
     }
 
     return addr;
@@ -728,6 +728,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     size_t tima_len = 4ull << TM_SHIFT;
     CPUState *cs;
     int fd;
+    void *addr;
 
     /*
      * The KVM XIVE device already in use. This is the case when
@@ -763,11 +764,12 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     /*
      * 1. Source ESB pages - KVM mapping
      */
-    xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
-                                      &local_err);
-    if (local_err) {
+    addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
+                            &local_err);
+    if (addr == MAP_FAILED) {
         goto fail;
     }
+    xsrc->esb_mmap = addr;
 
     memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc),
                                       "xive.esb-kvm", esb_len, xsrc->esb_mmap);
@@ -781,11 +783,13 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     /*
      * 3. TIMA pages - KVM mapping
      */
-    xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
-                                     &local_err);
-    if (local_err) {
+    addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
+                            &local_err);
+    if (addr == MAP_FAILED) {
         goto fail;
     }
+    xive->tm_mmap = addr;
+
     memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive),
                                       "xive.tima", tima_len, xive->tm_mmap);
     memory_region_add_subregion_overlap(&xive->tm_mmio, 0,

From 5fa36b7ffbcb2056249929a7b1ee4e30c07dc67c Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:26 +0200
Subject: [PATCH 29/40] spapr/xive: Rework error handling of
 kvmppc_xive_cpu_[gs]et_state()

kvm_set_one_reg() returns a negative errno on failure, use that instead
of errno. Also propagate it to callers so they can use it to check
for failures and hopefully get rid of their local_err boilerplate.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707846665.1489912.14267225652103441921.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 15 ++++++++++-----
 include/hw/ppc/xive.h    |  4 ++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index b2a36fd59d..5e088ccbf8 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -73,7 +73,7 @@ static void kvm_cpu_disable_all(void)
  * XIVE Thread Interrupt Management context (KVM)
  */
 
-void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
+int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
 {
     SpaprXive *xive = SPAPR_XIVE(tctx->xptr);
     uint64_t state[2];
@@ -86,13 +86,16 @@ void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp)
 
     ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
     if (ret != 0) {
-        error_setg_errno(errp, errno,
+        error_setg_errno(errp, -ret,
                          "XIVE: could not restore KVM state of CPU %ld",
                          kvm_arch_vcpu_id(tctx->cs));
+        return ret;
     }
+
+    return 0;
 }
 
-void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
+int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
 {
     SpaprXive *xive = SPAPR_XIVE(tctx->xptr);
     uint64_t state[2] = { 0 };
@@ -102,14 +105,16 @@ void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
 
     ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
     if (ret != 0) {
-        error_setg_errno(errp, errno,
+        error_setg_errno(errp, -ret,
                          "XIVE: could not capture KVM state of CPU %ld",
                          kvm_arch_vcpu_id(tctx->cs));
-        return;
+        return ret;
     }
 
     /* word0 and word1 of the OS ring. */
     *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
+
+    return 0;
 }
 
 typedef struct {
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 2d87ed4372..785c905357 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -489,7 +489,7 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
 void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
-void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
-void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp);
 
 #endif /* PPC_XIVE_H */

From f9a548edf2a54f59c37032dee3763f532e968fee Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:33 +0200
Subject: [PATCH 30/40] spapr/xive: Rework error handling of
 kvmppc_xive_[gs]et_queue_config()

Since kvm_device_access() returns a negative errno on failure, convert
kvmppc_xive_get_queue_config() and kvmppc_xive_set_queue_config() to
use it for error checking. This allows to get rid of the local_err
boilerplate.

Propagate the return value so that callers may use it as well to check
failures.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707847357.1489912.2032291280645236480.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c    | 35 ++++++++++++++++-------------------
 include/hw/ppc/spapr_xive.h |  4 ++--
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 5e088ccbf8..696623f717 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -374,15 +374,15 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
 /*
  * sPAPR XIVE interrupt controller (KVM)
  */
-void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
-                                  uint32_t end_idx, XiveEND *end,
-                                  Error **errp)
+int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp)
 {
     struct kvm_ppc_xive_eq kvm_eq = { 0 };
     uint64_t kvm_eq_idx;
     uint8_t priority;
     uint32_t server;
-    Error *local_err = NULL;
+    int ret;
 
     assert(xive_end_is_valid(end));
 
@@ -394,11 +394,10 @@ void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
     kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
         KVM_XIVE_EQ_SERVER_MASK;
 
-    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
-                      &kvm_eq, false, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
+    ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                            &kvm_eq, false, errp);
+    if (ret < 0) {
+        return ret;
     }
 
     /*
@@ -408,17 +407,18 @@ void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
      */
     end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
         xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
+
+    return 0;
 }
 
-void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
-                                  uint32_t end_idx, XiveEND *end,
-                                  Error **errp)
+int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+                                 uint32_t end_idx, XiveEND *end,
+                                 Error **errp)
 {
     struct kvm_ppc_xive_eq kvm_eq = { 0 };
     uint64_t kvm_eq_idx;
     uint8_t priority;
     uint32_t server;
-    Error *local_err = NULL;
 
     /*
      * Build the KVM state from the local END structure.
@@ -456,12 +456,9 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
     kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT &
         KVM_XIVE_EQ_SERVER_MASK;
 
-    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
-                      &kvm_eq, true, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    return
+        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx,
+                          &kvm_eq, true, errp);
 }
 
 void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index 93d09d68de..d0a08b618f 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -85,10 +85,10 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp);
 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
                             uint64_t data, bool write);
-void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
+int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
                                  uint32_t end_idx, XiveEND *end,
                                  Error **errp);
-void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
+int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
                                  uint32_t end_idx, XiveEND *end,
                                  Error **errp);
 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);

From d53482a73bba983f521d6b9652e6f68e856ab794 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:40 +0200
Subject: [PATCH 31/40] spapr/xive: Rework error handling in
 kvmppc_xive_get_queues()

Since kvmppc_xive_get_queue_config() has a return value, convert
kvmppc_xive_get_queues() to use it for error checking. This allows
to get rid of the local_err boiler plate.

Propagate the return value so that callers may use it as well to check
failures.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707848069.1489912.14879208798696134531.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 696623f717..4142aaffff 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -467,23 +467,24 @@ void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
                       NULL, true, errp);
 }
 
-static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
+static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
 {
-    Error *local_err = NULL;
     int i;
+    int ret;
 
     for (i = 0; i < xive->nr_ends; i++) {
         if (!xive_end_is_valid(&xive->endt[i])) {
             continue;
         }
 
-        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
-                                     &xive->endt[i], &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
+        ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                           &xive->endt[i], errp);
+        if (ret < 0) {
+            return ret;
         }
     }
+
+    return 0;
 }
 
 /*

From d55daadcb801f309556fdbab00b2653d20e26603 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:47 +0200
Subject: [PATCH 32/40] spapr/xive: Rework error handling of
 kvmppc_xive_set_source_config()

Since kvm_device_access() returns a negative errno on failure, convert
kvmppc_xive_set_source_config() to use it for error checking. This allows
to get rid of the local_err boilerplate.

Propagate the return value so that callers may use it as well to check
failures.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707848764.1489912.17078842252160674523.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c    | 13 ++++---------
 include/hw/ppc/spapr_xive.h |  4 ++--
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 4142aaffff..f2dda69218 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -186,8 +186,8 @@ int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
  * XIVE Interrupt Source (KVM)
  */
 
-void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
-                                   Error **errp)
+int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                  Error **errp)
 {
     uint32_t end_idx;
     uint32_t end_blk;
@@ -196,7 +196,6 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
     bool masked;
     uint32_t eisn;
     uint64_t kvm_src;
-    Error *local_err = NULL;
 
     assert(xive_eas_is_valid(eas));
 
@@ -216,12 +215,8 @@ void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
     kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) &
         KVM_XIVE_SOURCE_EISN_MASK;
 
-    kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
-                      &kvm_src, true, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn,
+                             &kvm_src, true, errp);
 }
 
 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp)
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index d0a08b618f..0ffbe0be02 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -80,8 +80,8 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
                         Error **errp);
 void kvmppc_xive_disconnect(SpaprInterruptController *intc);
 void kvmppc_xive_reset(SpaprXive *xive, Error **errp);
-void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
-                                   Error **errp);
+int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas,
+                                  Error **errp);
 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp);
 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
                             uint64_t data, bool write);

From 42a92d925d03ec49dfdefb43c15b46c3ca55f9e4 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:54:54 +0200
Subject: [PATCH 33/40] spapr/kvm: Fix error handling in kvmppc_xive_pre_save()

Now that kvmppc_xive_get_queues() returns a negative errno on failure, check
with that because it is preferred to local_err. And most of all, propagate
it because vmstate expects negative errnos.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707849455.1489912.6034461176847728064.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index f2dda69218..1686b036eb 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -604,16 +604,17 @@ void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
 int kvmppc_xive_pre_save(SpaprXive *xive)
 {
     Error *local_err = NULL;
+    int ret;
 
     assert(xive->fd != -1);
 
     /* EAT: there is no extra state to query from KVM */
 
     /* ENDT */
-    kvmppc_xive_get_queues(xive, &local_err);
-    if (local_err) {
+    ret = kvmppc_xive_get_queues(xive, &local_err);
+    if (ret < 0) {
         error_report_err(local_err);
-        return -1;
+        return ret;
     }
 
     return 0;

From a845a54cbe0f197b833921b2af13fee88bb8240d Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:55:01 +0200
Subject: [PATCH 34/40] spapr/xive: Fix error handling in
 kvmppc_xive_post_load()

Now that all these functions return a negative errno on failure, check
that because it is preferred to local_err. And most of all, propagate it
because vmstate expects negative errnos.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707850148.1489912.18355118622296682631.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 1686b036eb..005729ebff 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -631,6 +631,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
     Error *local_err = NULL;
     CPUState *cs;
     int i;
+    int ret;
 
     /* The KVM XIVE device should be in use */
     assert(xive->fd != -1);
@@ -641,11 +642,10 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
             continue;
         }
 
-        kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
-                                     &xive->endt[i], &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            return -1;
+        ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                           &xive->endt[i], &local_err);
+        if (ret < 0) {
+            goto fail;
         }
     }
 
@@ -660,16 +660,14 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
          * previously set in KVM. Since we don't do that for all interrupts
          * at reset time anymore, let's do it now.
          */
-        kvmppc_xive_source_reset_one(&xive->source, i, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            return -1;
+        ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err);
+        if (ret < 0) {
+            goto fail;
         }
 
-        kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            return -1;
+        ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err);
+        if (ret < 0) {
+            goto fail;
         }
     }
 
@@ -686,15 +684,18 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id)
     CPU_FOREACH(cs) {
         PowerPCCPU *cpu = POWERPC_CPU(cs);
 
-        kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            return -1;
+        ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err);
+        if (ret < 0) {
+            goto fail;
         }
     }
 
     /* The source states will be restored when the machine starts running */
     return 0;
+
+fail:
+    error_report_err(local_err);
+    return ret;
 }
 
 /* Returns MAP_FAILED on error and sets errno */

From 2a8100cb61959e7a934049f2bd3a49a0f84a066b Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:55:08 +0200
Subject: [PATCH 35/40] ppc/xive: Fix error handling in vmstate_xive_tctx_*()
 callbacks

Now that kvmppc_xive_cpu_get_state() and kvmppc_xive_cpu_set_state()
return negative errnos on failures, use that instead local_err because
it is the recommended practice. Also return that instead of -1 since
vmstate expects negative errnos.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707850840.1489912.14912810818646455474.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/xive.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index a453e8f4dc..17ca5a1916 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -695,12 +695,13 @@ static int vmstate_xive_tctx_pre_save(void *opaque)
 {
     XiveTCTX *tctx = XIVE_TCTX(opaque);
     Error *local_err = NULL;
+    int ret;
 
     if (xive_in_kernel(tctx->xptr)) {
-        kvmppc_xive_cpu_get_state(tctx, &local_err);
-        if (local_err) {
+        ret = kvmppc_xive_cpu_get_state(tctx, &local_err);
+        if (ret < 0) {
             error_report_err(local_err);
-            return -1;
+            return ret;
         }
     }
 
@@ -711,16 +712,17 @@ static int vmstate_xive_tctx_post_load(void *opaque, int version_id)
 {
     XiveTCTX *tctx = XIVE_TCTX(opaque);
     Error *local_err = NULL;
+    int ret;
 
     if (xive_in_kernel(tctx->xptr)) {
         /*
          * Required for hotplugged CPU, for which the state comes
          * after all states of the machine.
          */
-        kvmppc_xive_cpu_set_state(tctx, &local_err);
-        if (local_err) {
+        ret = kvmppc_xive_cpu_set_state(tctx, &local_err);
+        if (ret < 0) {
             error_report_err(local_err);
-            return -1;
+            return ret;
         }
     }
 

From 6cdc0e20631faf781047b515215208660393c9a9 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:55:15 +0200
Subject: [PATCH 36/40] spapr/xive: Simplify error handling in
 kvmppc_xive_connect()

Now that all these functions return a negative errno on failure, check
that and get rid of the local_err boilerplate.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707851537.1489912.1030839306195472651.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 005729ebff..e9a36115be 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -723,12 +723,12 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
 {
     SpaprXive *xive = SPAPR_XIVE(intc);
     XiveSource *xsrc = &xive->source;
-    Error *local_err = NULL;
     size_t esb_len = xive_source_esb_len(xsrc);
     size_t tima_len = 4ull << TM_SHIFT;
     CPUState *cs;
     int fd;
     void *addr;
+    int ret;
 
     /*
      * The KVM XIVE device already in use. This is the case when
@@ -754,9 +754,10 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     /* Tell KVM about the # of VCPUs we may have */
     if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
                               KVM_DEV_XIVE_NR_SERVERS)) {
-        if (kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
-                              KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true,
-                              &local_err)) {
+        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL,
+                                KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true,
+                                errp);
+        if (ret < 0) {
             goto fail;
         }
     }
@@ -764,8 +765,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     /*
      * 1. Source ESB pages - KVM mapping
      */
-    addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len,
-                            &local_err);
+    addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp);
     if (addr == MAP_FAILED) {
         goto fail;
     }
@@ -783,8 +783,7 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     /*
      * 3. TIMA pages - KVM mapping
      */
-    addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len,
-                            &local_err);
+    addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp);
     if (addr == MAP_FAILED) {
         goto fail;
     }
@@ -802,15 +801,15 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     CPU_FOREACH(cs) {
         PowerPCCPU *cpu = POWERPC_CPU(cs);
 
-        kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err);
-        if (local_err) {
+        ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp);
+        if (ret < 0) {
             goto fail;
         }
     }
 
     /* Update the KVM sources */
-    kvmppc_xive_source_reset(xsrc, &local_err);
-    if (local_err) {
+    ret = kvmppc_xive_source_reset(xsrc, errp);
+    if (ret < 0) {
         goto fail;
     }
 
@@ -820,7 +819,6 @@ int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers,
     return 0;
 
 fail:
-    error_propagate(errp, local_err);
     kvmppc_xive_disconnect(intc);
     return -1;
 }

From 61203f2b356f8aa7b6cfd3f792cd1f0cc7bdf99b Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:55:22 +0200
Subject: [PATCH 37/40] ppc/xive: Simplify error handling in
 xive_tctx_realize()

Now that kvmppc_xive_cpu_connect() returns a negative errno on failure,
use that and get rid of the local_err boilerplate.

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707852234.1489912.16410314514265848075.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/xive.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 17ca5a1916..489e6256ef 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -662,7 +662,6 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
     XiveTCTX *tctx = XIVE_TCTX(dev);
     PowerPCCPU *cpu;
     CPUPPCState *env;
-    Error *local_err = NULL;
 
     assert(tctx->cs);
     assert(tctx->xptr);
@@ -683,9 +682,7 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp)
 
     /* Connect the presenter to the VCPU (required for CPU hotplug) */
     if (xive_in_kernel(tctx->xptr)) {
-        kvmppc_xive_cpu_connect(tctx, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
+        if (kvmppc_xive_cpu_connect(tctx, errp) < 0) {
             return;
         }
     }

From 1118b6b72719ff83f2be1efc11d7248cf225074c Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 10 Aug 2020 18:55:29 +0200
Subject: [PATCH 38/40] spapr/xive: Simplify error handling of
 kvmppc_xive_cpu_synchronize_state()

Now that kvmppc_xive_cpu_get_state() returns negative on error, use that
and get rid of the temporary Error object and error_propagate().

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159707852916.1489912.8376334685349668124.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive_kvm.c | 14 ++++++--------
 include/hw/ppc/xive.h    |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index e9a36115be..d871bb1a00 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -119,7 +119,8 @@ int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
 
 typedef struct {
     XiveTCTX *tctx;
-    Error *err;
+    Error **errp;
+    int ret;
 } XiveCpuGetState;
 
 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
@@ -127,14 +128,14 @@ static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
 {
     XiveCpuGetState *s = arg.host_ptr;
 
-    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
+    s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp);
 }
 
-void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
+int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
 {
     XiveCpuGetState s = {
         .tctx = tctx,
-        .err = NULL,
+        .errp = errp,
     };
 
     /*
@@ -143,10 +144,7 @@ void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
     run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
                RUN_ON_CPU_HOST_PTR(&s));
 
-    if (s.err) {
-        error_propagate(errp, s.err);
-        return;
-    }
+    return s.ret;
 }
 
 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 785c905357..2c42ae92d2 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -488,7 +488,7 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb);
 int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
-void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
+int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
 int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp);
 int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp);
 

From 37035df51eaabb8d26b71da75b88a1c6727de8fa Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Fri, 14 Aug 2020 01:12:19 +0200
Subject: [PATCH 39/40] nvram: Exit QEMU if NVRAM cannot contain all -prom-env
 data

Since commit 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to
support the -prom-env parameter"), pseries machines can pre-initialize
the "system" partition in the NVRAM with the data passed to all -prom-env
parameters on the QEMU command line.

In this case it is assumed that all the data fits in 64 KiB, but the user
can easily pass more and crash QEMU:

$ qemu-system-ppc64 -M pseries $(for ((x=0;x<128;x++)); do \
  echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \
  done) # this requires ~128 Kib
malloc(): corrupted top size
Aborted (core dumped)

This happens because we don't check if all the prom-env data fits in
the NVRAM and chrp_nvram_set_var() happily memcpy() it passed the
buffer.

This crash affects basically all ppc/ppc64 machine types that use -prom-env:
- pseries (all versions)
- g3beige
- mac99

and also sparc/sparc64 machine types:
- LX
- SPARCClassic
- SPARCbook
- SS-10
- SS-20
- SS-4
- SS-5
- SS-600MP
- Voyager
- sun4u
- sun4v

Add a max_len argument to chrp_nvram_create_system_partition() so that
it can check the available size before writing to memory.

Since NVRAM is populated at machine init, it seems reasonable to consider
this error as fatal. So, instead of reporting an error when we detect that
the NVRAM is too small and adapt all machine types to handle it, we simply
exit QEMU in all cases. This is still better than crashing. If someone
wants another behavior, I guess this can be reworked later.

Tested with:

$ yes q | \
  (for arch in ppc ppc64 sparc sparc64; do \
       echo == $arch ==; \
       qemu=${arch}-softmmu/qemu-system-$arch; \
       for mach in $($qemu -M help | awk '! /^Supported/ { print $1 }'); do \
           echo $mach; \
           $qemu -M $mach -monitor stdio -nodefaults -nographic \
           $(for ((x=0;x<128;x++)); do \
                 echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \
             done) >/dev/null; \
        done; echo; \
   done)

Without the patch, affected machine types cause QEMU to report some
memory corruption and crash:

malloc(): corrupted top size

free(): invalid size

*** stack smashing detected ***: terminated

With the patch, QEMU prints the following message and exits:

NVRAM is too small. Try to pass less data to -prom-env

It seems that the conditions for the crash have always existed, but it
affects pseries, the machine type I care for, since commit 61f20b9dc5b7
only.

Fixes: 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter")
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1867739
Reported-by: John Snow <jsnow@redhat.com>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159736033937.350502.12402444542194031035.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/nvram/chrp_nvram.c         | 24 +++++++++++++++++++++---
 hw/nvram/mac_nvram.c          |  2 +-
 hw/nvram/spapr_nvram.c        |  3 ++-
 hw/sparc/sun4m.c              |  2 +-
 hw/sparc64/sun4u.c            |  2 +-
 include/hw/nvram/chrp_nvram.h |  3 ++-
 6 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c
index d969f26704..d4d10a7c03 100644
--- a/hw/nvram/chrp_nvram.c
+++ b/hw/nvram/chrp_nvram.c
@@ -21,14 +21,21 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include "hw/nvram/chrp_nvram.h"
 #include "sysemu/sysemu.h"
 
-static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str)
+static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str,
+                              int max_len)
 {
     int len;
 
     len = strlen(str) + 1;
+
+    if (max_len < len) {
+        return -1;
+    }
+
     memcpy(&nvram[addr], str, len);
 
     return addr + len;
@@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str)
  * Create a "system partition", used for the Open Firmware
  * environment variables.
  */
-int chrp_nvram_create_system_partition(uint8_t *data, int min_len)
+int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len)
 {
     ChrpNvramPartHdr *part_header;
     unsigned int i;
     int end;
 
+    if (max_len < sizeof(*part_header)) {
+        goto fail;
+    }
+
     part_header = (ChrpNvramPartHdr *)data;
     part_header->signature = CHRP_NVPART_SYSTEM;
     pstrcpy(part_header->name, sizeof(part_header->name), "system");
 
     end = sizeof(ChrpNvramPartHdr);
     for (i = 0; i < nb_prom_envs; i++) {
-        end = chrp_nvram_set_var(data, end, prom_envs[i]);
+        end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end);
+        if (end == -1) {
+            goto fail;
+        }
     }
 
     /* End marker */
@@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len)
     chrp_nvram_finish_partition(part_header, end);
 
     return end;
+
+fail:
+    error_report("NVRAM is too small. Try to pass less data to -prom-env");
+    exit(EXIT_FAILURE);
 }
 
 /**
diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index beec1c4e4d..11f2d31cdb 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -141,7 +141,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off,
 
     /* OpenBIOS nvram variables partition */
     sysp_end = chrp_nvram_create_system_partition(&nvr->data[off],
-                                                  DEF_SYSTEM_SIZE) + off;
+                                                  DEF_SYSTEM_SIZE, len) + off;
 
     /* Free space partition */
     chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end);
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 15d08281d4..386513499f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp)
         }
     } else if (nb_prom_envs > 0) {
         /* Create a system partition to pass the -prom-env variables */
-        chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4);
+        chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4,
+                                           nvram->size);
         chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4],
                                          nvram->size - MIN_NVRAM_SIZE / 4);
     }
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 9be930415f..cf7dfa4af5 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -143,7 +143,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr,
     memset(image, '\0', sizeof(image));
 
     /* OpenBIOS nvram variables partition */
-    sysp_end = chrp_nvram_create_system_partition(image, 0);
+    sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0);
 
     /* Free space partition */
     chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end);
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 9e30203dcc..37310b73e6 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -136,7 +136,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size,
     memset(image, '\0', sizeof(image));
 
     /* OpenBIOS nvram variables partition */
-    sysp_end = chrp_nvram_create_system_partition(image, 0);
+    sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0);
 
     /* Free space partition */
     chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end);
diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h
index 09941a9be4..4a0f5c21b8 100644
--- a/include/hw/nvram/chrp_nvram.h
+++ b/include/hw/nvram/chrp_nvram.h
@@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size)
     header->checksum = sum & 0xff;
 }
 
-int chrp_nvram_create_system_partition(uint8_t *data, int min_len);
+/* chrp_nvram_create_system_partition() failure is fatal */
+int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len);
 int chrp_nvram_create_free_partition(uint8_t *data, int len);
 
 #endif

From 3110f0ee19ccdb50adff3dfa1321039f69efddcd Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Thu, 13 Aug 2020 19:28:10 +0200
Subject: [PATCH 40/40] spapr/xive: Use xive_source_esb_len()

static inline size_t xive_source_esb_len(XiveSource *xsrc)
{
    return (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
}

Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <159733969034.320580.6571451425779179477.stgit@bahia.lan>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/intc/spapr_xive.c     | 2 +-
 hw/intc/spapr_xive_kvm.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 3c84f64dc4..4bd0d606ba 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -336,7 +336,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio);
 
     /* Set the mapping address of the END ESB pages after the source ESBs */
-    xive->end_base = xive->vc_base + (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+    xive->end_base = xive->vc_base + xive_source_esb_len(xsrc);
 
     /*
      * Allocate the routing tables
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index d871bb1a00..e8667ce5f6 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -831,7 +831,7 @@ void kvmppc_xive_disconnect(SpaprInterruptController *intc)
 
     /* Clear the KVM mapping */
     xsrc = &xive->source;
-    esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs;
+    esb_len = xive_source_esb_len(xsrc);
 
     if (xsrc->esb_mmap) {
         memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm);