From 44a3dd9b8718a7f0a94053c57782886748a43d7a Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 6 Oct 2016 16:50:48 +0200 Subject: [PATCH 01/16] tests: minor cleanups in usb-hcd-uhci-test Two minor cleanups: - exit gracefully in case on unsupported target, - put machine command line in a constant to avoid to duplicate it. Signed-off-by: Laurent Vivier Reviewed-by: Gerd Hoffmann Signed-off-by: David Gibson --- tests/usb-hcd-uhci-test.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c index 4b951ce492..e956b9ccb7 100644 --- a/tests/usb-hcd-uhci-test.c +++ b/tests/usb-hcd-uhci-test.c @@ -77,6 +77,9 @@ static void test_usb_storage_hotplug(void) int main(int argc, char **argv) { const char *arch = qtest_get_arch(); + const char *cmd = "-device piix3-usb-uhci,id=uhci,addr=1d.0" + " -drive id=drive0,if=none,file=/dev/null,format=raw" + " -device usb-tablet,bus=uhci.0,port=1"; int ret; g_test_init(&argc, &argv, NULL); @@ -87,13 +90,13 @@ int main(int argc, char **argv) qtest_add_func("/uhci/pci/hotplug/usb-storage", test_usb_storage_hotplug); if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { - qs = qtest_pc_boot("-device piix3-usb-uhci,id=uhci,addr=1d.0" - " -drive id=drive0,if=none,file=/dev/null,format=raw" - " -device usb-tablet,bus=uhci.0,port=1"); + qs = qtest_pc_boot(cmd); } else if (strcmp(arch, "ppc64") == 0) { - qs = qtest_spapr_boot("-device piix3-usb-uhci,id=uhci,addr=1d.0" - " -drive id=drive0,if=none,file=/dev/null,format=raw" - " -device usb-tablet,bus=uhci.0,port=1"); + qs = qtest_spapr_boot(cmd); + } else { + g_printerr("usb-hcd-uhci-test tests are only " + "available on x86 or ppc64\n"); + exit(EXIT_FAILURE); } ret = g_test_run(); qtest_shutdown(qs); From 54ce6f22e86977b6e94692d90e13fce6fc9aa641 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 7 Oct 2016 12:14:27 +0200 Subject: [PATCH 02/16] qtest: ask endianness of the target in qtest_init() The target endianness is not deduced anymore from the architecture name but asked directly to the guest, using a new qtest command: "endianness". As it can't change (this is the value of TARGET_WORDS_BIGENDIAN), we store it to not have to ask every time we want to know if we have to byte-swap a value. Signed-off-by: Laurent Vivier CC: Greg Kurz CC: David Gibson CC: Peter Maydell Signed-off-by: David Gibson --- qtest.c | 7 ++++ tests/libqos/virtio-pci.c | 2 +- tests/libqtest.c | 68 +++++++++++++-------------------------- tests/libqtest.h | 16 +++++++-- tests/virtio-blk-test.c | 2 +- 5 files changed, 45 insertions(+), 50 deletions(-) diff --git a/qtest.c b/qtest.c index 22482cc359..b53b39c9d7 100644 --- a/qtest.c +++ b/qtest.c @@ -537,6 +537,13 @@ static void qtest_process_command(CharDriverState *chr, gchar **words) qtest_send_prefix(chr); qtest_send(chr, "OK\n"); + } else if (strcmp(words[0], "endianness") == 0) { + qtest_send_prefix(chr); +#if defined(TARGET_WORDS_BIGENDIAN) + qtest_sendf(chr, "OK big\n"); +#else + qtest_sendf(chr, "OK little\n"); +#endif #ifdef TARGET_PPC64 } else if (strcmp(words[0], "rtas") == 0) { uint64_t res, args, ret; diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 18b92b95dc..6e005c1835 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -86,7 +86,7 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, uint64_t addr) int i; uint64_t u64 = 0; - if (qtest_big_endian()) { + if (target_big_endian()) { for (i = 0; i < 8; ++i) { u64 |= (uint64_t)qpci_io_readb(dev->pdev, (void *)(uintptr_t)addr + i) << (7 - i) * 8; diff --git a/tests/libqtest.c b/tests/libqtest.c index 6f6bdf142f..d4e6bff121 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -37,6 +37,7 @@ struct QTestState bool irq_level[MAX_IRQ]; GString *rx; pid_t qemu_pid; /* our child QEMU process */ + bool big_endian; }; static GHookList abrt_hooks; @@ -47,6 +48,8 @@ static struct sigaction sigact_old; g_assert_cmpint(ret, !=, -1); \ } while (0) +static int qtest_query_target_endianness(QTestState *s); + static int init_socket(const char *socket_path) { struct sockaddr_un addr; @@ -209,6 +212,10 @@ QTestState *qtest_init(const char *extra_args) kill(s->qemu_pid, SIGSTOP); } + /* ask endianness of the target */ + + s->big_endian = qtest_query_target_endianness(s); + return s; } @@ -342,6 +349,20 @@ redo: return words; } +static int qtest_query_target_endianness(QTestState *s) +{ + gchar **args; + int big_endian; + + qtest_sendf(s, "endianness\n"); + args = qtest_rsp(s, 1); + g_assert(strcmp(args[1], "big") == 0 || strcmp(args[1], "little") == 0); + big_endian = strcmp(args[1], "big") == 0; + g_strfreev(args); + + return big_endian; +} + typedef struct { JSONMessageParser parser; QDict *response; @@ -886,50 +907,7 @@ char *hmp(const char *fmt, ...) return ret; } -bool qtest_big_endian(void) +bool qtest_big_endian(QTestState *s) { - const char *arch = qtest_get_arch(); - int i; - - static const struct { - const char *arch; - bool big_endian; - } endianness[] = { - { "aarch64", false }, - { "alpha", false }, - { "arm", false }, - { "cris", false }, - { "i386", false }, - { "lm32", true }, - { "m68k", true }, - { "microblaze", true }, - { "microblazeel", false }, - { "mips", true }, - { "mips64", true }, - { "mips64el", false }, - { "mipsel", false }, - { "moxie", true }, - { "or32", true }, - { "ppc", true }, - { "ppc64", true }, - { "ppcemb", true }, - { "s390x", true }, - { "sh4", false }, - { "sh4eb", true }, - { "sparc", true }, - { "sparc64", true }, - { "unicore32", false }, - { "x86_64", false }, - { "xtensa", false }, - { "xtensaeb", true }, - {}, - }; - - for (i = 0; endianness[i].arch; i++) { - if (strcmp(endianness[i].arch, arch) == 0) { - return endianness[i].big_endian; - } - } - - return false; + return s->big_endian; } diff --git a/tests/libqtest.h b/tests/libqtest.h index f7402e0cc1..4be1f77877 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -409,6 +409,14 @@ int64_t qtest_clock_step(QTestState *s, int64_t step); */ int64_t qtest_clock_set(QTestState *s, int64_t val); +/** + * qtest_big_endian: + * @s: QTestState instance to operate on. + * + * Returns: True if the architecture under test has a big endian configuration. + */ +bool qtest_big_endian(QTestState *s); + /** * qtest_get_arch: * @@ -874,12 +882,14 @@ static inline int64_t clock_set(int64_t val) } /** - * qtest_big_endian: + * target_big_endian: * * Returns: True if the architecture under test has a big endian configuration. */ -bool qtest_big_endian(void); - +static inline bool target_big_endian(void) +{ + return qtest_big_endian(global_qtest); +} QDict *qmp_fd_receive(int fd); void qmp_fd_sendv(int fd, const char *fmt, va_list ap); diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 3c4fecc1f0..0506917341 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -125,7 +125,7 @@ static inline void virtio_blk_fix_request(QVirtioBlkReq *req) bool host_endian = false; #endif - if (qtest_big_endian() != host_endian) { + if (target_big_endian() != host_endian) { req->type = bswap32(req->type); req->ioprio = bswap32(req->ioprio); req->sector = bswap64(req->sector); From 1ef2ef96296f061c89b60e77c3c50577fd6fe415 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 11 Oct 2016 17:19:35 +0200 Subject: [PATCH 03/16] tests/boot-sector: Use minimum length for the Forth boot script The pxe-test is quite slow on ppc64 with tcg. We can speed it up a little bit by decreasing the size of the file that has to be loaded via TFTP. Signed-off-by: Thomas Huth Reviewed-by: Eric Blake Reviewed-by: Michael S. Tsirkin Signed-off-by: David Gibson --- tests/boot-sector.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/boot-sector.c b/tests/boot-sector.c index e3193c0a12..0168fd0247 100644 --- a/tests/boot-sector.c +++ b/tests/boot-sector.c @@ -72,6 +72,7 @@ static uint8_t boot_sector[0x7e000] = { int boot_sector_init(const char *fname) { FILE *f = fopen(fname, "w"); + size_t len = sizeof boot_sector; if (!f) { fprintf(stderr, "Couldn't open \"%s\": %s", fname, strerror(errno)); @@ -80,13 +81,12 @@ int boot_sector_init(const char *fname) /* For Open Firmware based system, we can use a Forth script instead */ if (strcmp(qtest_get_arch(), "ppc64") == 0) { - memset(boot_sector, ' ', sizeof boot_sector); - sprintf((char *)boot_sector, "\\ Bootscript\n%x %x c! %x %x c!\n", + len = sprintf((char *)boot_sector, "\\ Bootscript\n%x %x c! %x %x c!\n", LOW(SIGNATURE), BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET, HIGH(SIGNATURE), BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET + 1); } - fwrite(boot_sector, 1, sizeof boot_sector, f); + fwrite(boot_sector, 1, len, f); fclose(f); return 0; } From 3e353773721596971db2d0abc7015e7ea3d3af07 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 11 Oct 2016 17:19:36 +0200 Subject: [PATCH 04/16] tests/boot-sector: Use mkstemp() to create a unique file name The pxe-test is run for three different targets now (x86_64, i386 and ppc64), and the bios-tables-test is run for two targets (x86_64 and i386). But each of the tests is using an invariant name for the disk image with the boot sector code - so if the tests are running in parallel, there is a race condition that they destroy the disk image of a parallel test program. Let's use mkstemp() to create unique temporary files here instead - and since mkstemp() is returning an integer file descriptor instead of a FILE pointer, we also switch the fwrite() and fclose() to write() and close() instead. Reported-by: Sascha Silbe Signed-off-by: Thomas Huth Reviewed-by: Michael S. Tsirkin Signed-off-by: David Gibson --- tests/bios-tables-test.c | 2 +- tests/boot-sector.c | 17 ++++++++++++----- tests/boot-sector.h | 4 ++-- tests/pxe-test.c | 2 +- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 6ea2b6d00a..812f830539 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -112,7 +112,7 @@ typedef struct { g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ } while (0) -static const char *disk = "tests/acpi-test-disk.raw"; +static char disk[] = "tests/acpi-test-disk-XXXXXX"; static const char *data_dir = "tests/acpi-test-data"; #ifdef CONFIG_IASL static const char *iasl = stringify(CONFIG_IASL); diff --git a/tests/boot-sector.c b/tests/boot-sector.c index 0168fd0247..83993143a6 100644 --- a/tests/boot-sector.c +++ b/tests/boot-sector.c @@ -69,12 +69,13 @@ static uint8_t boot_sector[0x7e000] = { }; /* Create boot disk file. */ -int boot_sector_init(const char *fname) +int boot_sector_init(char *fname) { - FILE *f = fopen(fname, "w"); + int fd, ret; size_t len = sizeof boot_sector; - if (!f) { + fd = mkstemp(fname); + if (fd < 0) { fprintf(stderr, "Couldn't open \"%s\": %s", fname, strerror(errno)); return 1; } @@ -86,8 +87,14 @@ int boot_sector_init(const char *fname) HIGH(SIGNATURE), BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET + 1); } - fwrite(boot_sector, 1, len, f); - fclose(f); + ret = write(fd, boot_sector, len); + close(fd); + + if (ret != len) { + fprintf(stderr, "Could not write \"%s\"", fname); + return 1; + } + return 0; } diff --git a/tests/boot-sector.h b/tests/boot-sector.h index f64b477aa3..35d61c7e2b 100644 --- a/tests/boot-sector.h +++ b/tests/boot-sector.h @@ -14,8 +14,8 @@ #ifndef TEST_BOOT_SECTOR_H #define TEST_BOOT_SECTOR_H -/* Create boot disk file. */ -int boot_sector_init(const char *fname); +/* Create boot disk file. fname must be a suitable string for mkstemp() */ +int boot_sector_init(char *fname); /* Loop until signature in memory is OK. */ void boot_sector_test(void); diff --git a/tests/pxe-test.c b/tests/pxe-test.c index 5d3ddbe5e9..34282d3704 100644 --- a/tests/pxe-test.c +++ b/tests/pxe-test.c @@ -19,7 +19,7 @@ #define NETNAME "net0" -static const char *disk = "tests/pxe-test-disk.raw"; +static char disk[] = "tests/pxe-test-disk-XXXXXX"; static void test_pxe_one(const char *params, bool ipv6) { From 74cba2b3b23d069d1a3d9b3ef7876123d939d3ba Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 11 Oct 2016 17:19:37 +0200 Subject: [PATCH 05/16] tests/boot-sector: Increase time-out to 90 seconds Since the PXE tester runs rather slow on ppc64 with tcg, there is a chance that we hit the 60 seconds timeout on machines that have a heavy CPU load. So let's increase the timeout to ease the situation. Signed-off-by: Thomas Huth Reviewed-by: Michael S. Tsirkin Signed-off-by: David Gibson --- tests/boot-sector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/boot-sector.c b/tests/boot-sector.c index 83993143a6..e3880f4455 100644 --- a/tests/boot-sector.c +++ b/tests/boot-sector.c @@ -106,9 +106,9 @@ void boot_sector_test(void) uint16_t signature; int i; - /* Wait at most 1 minute */ + /* Wait at most 90 seconds */ #define TEST_DELAY (1 * G_USEC_PER_SEC / 10) -#define TEST_CYCLES MAX((60 * G_USEC_PER_SEC / TEST_DELAY), 1) +#define TEST_CYCLES MAX((90 * G_USEC_PER_SEC / TEST_DELAY), 1) /* Poll until code has run and modified memory. Once it has we know BIOS * initialization is done. TODO: check that IP reached the halt From 125a9b2327173f4cd38509276ea74f2133934820 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Wed, 12 Oct 2016 10:38:51 +0530 Subject: [PATCH 06/16] target-ppc: implement vexts[bh]2w and vexts[bhw]2d Vector Extend Sign Instructions: vextsb2w: Vector Extend Sign Byte To Word vextsh2w: Vector Extend Sign Halfword To Word vextsb2d: Vector Extend Sign Byte To Doubleword vextsh2d: Vector Extend Sign Halfword To Doubleword vextsw2d: Vector Extend Sign Word To Doubleword Signed-off-by: Nikunj A Dadhania Signed-off-by: David Gibson --- target-ppc/helper.h | 5 +++++ target-ppc/int_helper.c | 15 +++++++++++++++ target-ppc/translate/vmx-impl.inc.c | 5 +++++ target-ppc/translate/vmx-ops.inc.c | 5 +++++ 4 files changed, 30 insertions(+) diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 796ad455f8..04c64217b7 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -267,6 +267,11 @@ DEF_HELPER_3(vinsertb, void, avr, avr, i32) DEF_HELPER_3(vinserth, void, avr, avr, i32) DEF_HELPER_3(vinsertw, void, avr, avr, i32) DEF_HELPER_3(vinsertd, void, avr, avr, i32) +DEF_HELPER_2(vextsb2w, void, avr, avr) +DEF_HELPER_2(vextsh2w, void, avr, avr) +DEF_HELPER_2(vextsb2d, void, avr, avr) +DEF_HELPER_2(vextsh2d, void, avr, avr) +DEF_HELPER_2(vextsw2d, void, avr, avr) DEF_HELPER_2(vupkhpx, void, avr, avr) DEF_HELPER_2(vupklpx, void, avr, avr) DEF_HELPER_2(vupkhsb, void, avr, avr) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 202854fabd..5aee0a81c7 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1934,6 +1934,21 @@ VEXTRACT(uw, u32) VEXTRACT(d, u64) #undef VEXTRACT +#define VEXT_SIGNED(name, element, mask, cast, recast) \ +void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ +{ \ + int i; \ + VECTOR_FOR_INORDER_I(i, element) { \ + r->element[i] = (recast)((cast)(b->element[i] & mask)); \ + } \ +} +VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t) +VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t) +VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t) +VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t) +VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t) +#undef VEXT_SIGNED + #define VSPLTI(suffix, element, splat_type) \ void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \ { \ diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c index 25cd0735ac..c8998f3eab 100644 --- a/target-ppc/translate/vmx-impl.inc.c +++ b/target-ppc/translate/vmx-impl.inc.c @@ -815,6 +815,11 @@ GEN_VXFORM_NOA(vclzb, 1, 28) GEN_VXFORM_NOA(vclzh, 1, 29) GEN_VXFORM_NOA(vclzw, 1, 30) GEN_VXFORM_NOA(vclzd, 1, 31) +GEN_VXFORM_NOA_2(vextsb2w, 1, 24, 16) +GEN_VXFORM_NOA_2(vextsh2w, 1, 24, 17) +GEN_VXFORM_NOA_2(vextsb2d, 1, 24, 24) +GEN_VXFORM_NOA_2(vextsh2d, 1, 24, 25) +GEN_VXFORM_NOA_2(vextsw2d, 1, 24, 26) GEN_VXFORM_NOA_2(vctzb, 1, 24, 28) GEN_VXFORM_NOA_2(vctzh, 1, 24, 29) GEN_VXFORM_NOA_2(vctzw, 1, 24, 30) diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c index ac1dc9b2b2..68cba3e474 100644 --- a/target-ppc/translate/vmx-ops.inc.c +++ b/target-ppc/translate/vmx-ops.inc.c @@ -215,6 +215,11 @@ GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x00000000, 0x100000, GEN_VXFORM_DUAL_INV(vspltisw, vinsertw, 6, 14, 0x00000000, 0x100000, PPC_ALTIVEC), GEN_VXFORM_300_EXT(vinsertd, 6, 15, 0x100000), +GEN_VXFORM_300_EO(vextsb2w, 0x01, 0x18, 0x10), +GEN_VXFORM_300_EO(vextsh2w, 0x01, 0x18, 0x11), +GEN_VXFORM_300_EO(vextsb2d, 0x01, 0x18, 0x18), +GEN_VXFORM_300_EO(vextsh2d, 0x01, 0x18, 0x19), +GEN_VXFORM_300_EO(vextsw2d, 0x01, 0x18, 0x1A), GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C), GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D), GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E), From 672de881e9367f6d1787901c016c6e836d712f21 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 12 Oct 2016 18:13:53 -0500 Subject: [PATCH 07/16] spapr: fix inheritance chain for default machine options Rather than machine instances having backward-compatible option defaults that need to be repeatedly re-enabled for every new machine type we introduce, we set the defaults appropriate for newer machine types, then add code to explicitly disable instance options as needed to maintain compatibility with older machine types. Currently pseries-2.5 does not inherit from pseries-2.6 in this fashion, which is okay at the moment since we do not have any instance compatibility options for pseries-2.6+ currently. We will make use of this in future patches though, so fix it here. Signed-off-by: Michael Roth [dwg: Extended to make 2.7 inherit from 2.8 as well] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 03e38039e8..fc4c3c98c8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2475,6 +2475,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", true); static void spapr_machine_2_7_instance_options(MachineState *machine) { + spapr_machine_2_8_instance_options(machine); } static void spapr_machine_2_7_class_options(MachineClass *mc) @@ -2501,6 +2502,7 @@ DEFINE_SPAPR_MACHINE(2_7, "2.7", false); static void spapr_machine_2_6_instance_options(MachineState *machine) { + spapr_machine_2_7_instance_options(machine); } static void spapr_machine_2_6_class_options(MachineClass *mc) @@ -2525,6 +2527,7 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", false); static void spapr_machine_2_5_instance_options(MachineState *machine) { + spapr_machine_2_6_instance_options(machine); } static void spapr_machine_2_5_class_options(MachineClass *mc) From cc706a530518f867c29177a5a337bb08503e617e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 3 Oct 2016 09:24:46 +0200 Subject: [PATCH 08/16] ppc/xics: Make the ICSState a list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of an array of fixed sized blocks, use a list, as we will need to have sources with variable number of interrupts. SPAPR only uses a single entry. Native will create more. If performance becomes an issue we can add some hashed lookup but for now this will do fine. Signed-off-by: Benjamin Herrenschmidt [ move the initialization of list to xics_common_initfn, restore xirr_owner after migration and move restoring to icp_post_load] Signed-off-by: Nikunj A Dadhania [ clg: removed the icp_post_load() changes from nikunj patchset v3: http://patchwork.ozlabs.org/patch/646008/ ] Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/trace-events | 5 ++- hw/intc/xics.c | 83 +++++++++++++++++++++++----------------- hw/intc/xics_kvm.c | 27 +++++++++---- hw/intc/xics_spapr.c | 88 ++++++++++++++++++++++++++++--------------- hw/ppc/spapr_events.c | 2 +- hw/ppc/spapr_pci.c | 5 +-- hw/ppc/spapr_vio.c | 2 +- include/hw/ppc/xics.h | 13 ++++--- 8 files changed, 139 insertions(+), 86 deletions(-) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index f12192c082..aa342a86fa 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -56,10 +56,11 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]" xics_ics_eoi(int nr) "ics_eoi: irq %#x" -xics_alloc(int src, int irq) "source#%d, irq %d" -xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d" +xics_alloc(int irq) "irq %d" +xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d, %d irqs, lsi=%d, alignnum %d" xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs" xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free" +xics_icp_post_load(uint32_t server_no, uint32_t xirr, uint64_t addr, uint8_t pend) "server_no %d, xirr %#x, xirr_owner 0x%" PRIx64 ", pending %d" # hw/intc/s390_flic_kvm.c flic_create_device(int err) "flic: create device failed %d" diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 69162f0328..433af93254 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -96,13 +96,16 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu) static void xics_common_reset(DeviceState *d) { XICSState *xics = XICS_COMMON(d); + ICSState *ics; int i; for (i = 0; i < xics->nr_servers; i++) { device_reset(DEVICE(&xics->ss[i])); } - device_reset(DEVICE(xics->ics)); + QLIST_FOREACH(ics, &xics->ics, list) { + device_reset(DEVICE(ics)); + } } static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name, @@ -134,7 +137,6 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name, } assert(info->set_nr_irqs); - assert(xics->ics); info->set_nr_irqs(xics, value, errp); } @@ -174,6 +176,9 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v, static void xics_common_initfn(Object *obj) { + XICSState *xics = XICS_COMMON(obj); + + QLIST_INIT(&xics->ics); object_property_add(obj, "nr_irqs", "int", xics_prop_get_nr_irqs, xics_prop_set_nr_irqs, NULL, NULL, NULL); @@ -212,33 +217,35 @@ static void ics_reject(ICSState *ics, int nr); static void ics_resend(ICSState *ics); static void ics_eoi(ICSState *ics, int nr); -static void icp_check_ipi(XICSState *xics, int server) +static void icp_check_ipi(ICPState *ss) { - ICPState *ss = xics->ss + server; - if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) { return; } - trace_xics_icp_check_ipi(server, ss->mfrr); + trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr); - if (XISR(ss)) { - ics_reject(xics->ics, XISR(ss)); + if (XISR(ss) && ss->xirr_owner) { + ics_reject(ss->xirr_owner, XISR(ss)); } ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI; ss->pending_priority = ss->mfrr; + ss->xirr_owner = NULL; qemu_irq_raise(ss->output); } static void icp_resend(XICSState *xics, int server) { ICPState *ss = xics->ss + server; + ICSState *ics; if (ss->mfrr < CPPR(ss)) { - icp_check_ipi(xics, server); + icp_check_ipi(ss); + } + QLIST_FOREACH(ics, &xics->ics, list) { + ics_resend(ics); } - ics_resend(xics->ics); } void icp_set_cppr(XICSState *xics, int server, uint8_t cppr) @@ -256,7 +263,10 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t cppr) ss->xirr &= ~XISR_MASK; /* Clear XISR */ ss->pending_priority = 0xff; qemu_irq_lower(ss->output); - ics_reject(xics->ics, old_xisr); + if (ss->xirr_owner) { + ics_reject(ss->xirr_owner, old_xisr); + ss->xirr_owner = NULL; + } } } else { if (!XISR(ss)) { @@ -271,7 +281,7 @@ void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr) ss->mfrr = mfrr; if (mfrr < CPPR(ss)) { - icp_check_ipi(xics, server); + icp_check_ipi(ss); } } @@ -282,6 +292,7 @@ uint32_t icp_accept(ICPState *ss) qemu_irq_lower(ss->output); ss->xirr = ss->pending_priority << 24; ss->pending_priority = 0xff; + ss->xirr_owner = NULL; trace_xics_icp_accept(xirr, ss->xirr); @@ -299,30 +310,40 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr) void icp_eoi(XICSState *xics, int server, uint32_t xirr) { ICPState *ss = xics->ss + server; + ICSState *ics; + uint32_t irq; /* Send EOI -> ICS */ ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK); trace_xics_icp_eoi(server, xirr, ss->xirr); - ics_eoi(xics->ics, xirr & XISR_MASK); + irq = xirr & XISR_MASK; + QLIST_FOREACH(ics, &xics->ics, list) { + if (ics_valid_irq(ics, irq)) { + ics_eoi(ics, irq); + } + } if (!XISR(ss)) { icp_resend(xics, server); } } -static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority) +static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority) { + XICSState *xics = ics->xics; ICPState *ss = xics->ss + server; trace_xics_icp_irq(server, nr, priority); if ((priority >= CPPR(ss)) || (XISR(ss) && (ss->pending_priority <= priority))) { - ics_reject(xics->ics, nr); + ics_reject(ics, nr); } else { - if (XISR(ss)) { - ics_reject(xics->ics, XISR(ss)); + if (XISR(ss) && ss->xirr_owner) { + ics_reject(ss->xirr_owner, XISR(ss)); + ss->xirr_owner = NULL; } ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK); + ss->xirr_owner = ics; ss->pending_priority = priority; trace_xics_icp_raise(ss->xirr, ss->pending_priority); qemu_irq_raise(ss->output); @@ -405,8 +426,7 @@ static void resend_msi(ICSState *ics, int srcno) if (irq->status & XICS_STATUS_REJECTED) { irq->status &= ~XICS_STATUS_REJECTED; if (irq->priority != 0xff) { - icp_irq(ics->xics, irq->server, srcno + ics->offset, - irq->priority); + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority); } } } @@ -419,7 +439,7 @@ static void resend_lsi(ICSState *ics, int srcno) && (irq->status & XICS_STATUS_ASSERTED) && !(irq->status & XICS_STATUS_SENT)) { irq->status |= XICS_STATUS_SENT; - icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority); + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority); } } @@ -434,7 +454,7 @@ static void set_irq_msi(ICSState *ics, int srcno, int val) irq->status |= XICS_STATUS_MASKED_PENDING; trace_xics_masked_pending(); } else { - icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority); + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority); } } } @@ -473,7 +493,7 @@ static void write_xive_msi(ICSState *ics, int srcno) } irq->status &= ~XICS_STATUS_MASKED_PENDING; - icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority); + icp_irq(ics, irq->server, srcno + ics->offset, irq->priority); } static void write_xive_lsi(ICSState *ics, int srcno) @@ -662,28 +682,23 @@ static const TypeInfo ics_info = { /* * Exported functions */ -int xics_find_source(XICSState *xics, int irq) +ICSState *xics_find_source(XICSState *xics, int irq) { - int sources = 1; - int src; + ICSState *ics; - /* FIXME: implement multiple sources */ - for (src = 0; src < sources; ++src) { - ICSState *ics = &xics->ics[src]; + QLIST_FOREACH(ics, &xics->ics, list) { if (ics_valid_irq(ics, irq)) { - return src; + return ics; } } - - return -1; + return NULL; } qemu_irq xics_get_qirq(XICSState *xics, int irq) { - int src = xics_find_source(xics, irq); + ICSState *ics = xics_find_source(xics, irq); - if (src >= 0) { - ICSState *ics = &xics->ics[src]; + if (ics) { return ics->qirqs[irq - ics->offset]; } diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index c9caefcf2b..843905cfbd 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -361,7 +361,13 @@ static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu) static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp) { - xics->nr_irqs = xics->ics->nr_irqs = nr_irqs; + ICSState *ics = QLIST_FIRST(&xics->ics); + + /* This needs to be deprecated ... */ + xics->nr_irqs = nr_irqs; + if (ics) { + ics->nr_irqs = nr_irqs; + } } static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers, @@ -394,6 +400,7 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp) { KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev); XICSState *xics = XICS_COMMON(dev); + ICSState *ics; int i, rc; Error *error = NULL; struct kvm_create_device xics_create_device = { @@ -445,10 +452,12 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp) xicskvm->kernel_xics_fd = xics_create_device.fd; - object_property_set_bool(OBJECT(xics->ics), true, "realized", &error); - if (error) { - error_propagate(errp, error); - goto fail; + QLIST_FOREACH(ics, &xics->ics, list) { + object_property_set_bool(OBJECT(ics), true, "realized", &error); + if (error) { + error_propagate(errp, error); + goto fail; + } } assert(xics->nr_servers); @@ -477,10 +486,12 @@ fail: static void xics_kvm_initfn(Object *obj) { XICSState *xics = XICS_COMMON(obj); + ICSState *ics; - xics->ics = ICS(object_new(TYPE_KVM_ICS)); - object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL); - xics->ics->xics = xics; + ics = ICS(object_new(TYPE_KVM_ICS)); + object_property_add_child(obj, "ics", OBJECT(ics), NULL); + ics->xics = xics; + QLIST_INSERT_HEAD(&xics->ics, ics, list); } static void xics_kvm_class_init(ObjectClass *oc, void *data) diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 618826dacf..0b0845d628 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -113,13 +113,17 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = spapr->xics->ics; + ICSState *ics = QLIST_FIRST(&spapr->xics->ics); uint32_t nr, server, priority; if ((nargs != 3) || (nret != 1)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } + if (!ics) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } nr = rtas_ld(args, 0); server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1)); @@ -141,13 +145,17 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = spapr->xics->ics; + ICSState *ics = QLIST_FIRST(&spapr->xics->ics); uint32_t nr; if ((nargs != 1) || (nret != 3)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } + if (!ics) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } nr = rtas_ld(args, 0); @@ -166,13 +174,17 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = spapr->xics->ics; + ICSState *ics = QLIST_FIRST(&spapr->xics->ics); uint32_t nr; if ((nargs != 1) || (nret != 1)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } + if (!ics) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } nr = rtas_ld(args, 0); @@ -192,13 +204,17 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - ICSState *ics = spapr->xics->ics; + ICSState *ics = QLIST_FIRST(&spapr->xics->ics); uint32_t nr; if ((nargs != 1) || (nret != 1)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); return; } + if (!ics) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } nr = rtas_ld(args, 0); @@ -217,7 +233,13 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs, Error **errp) { - xics->nr_irqs = xics->ics->nr_irqs = nr_irqs; + ICSState *ics = QLIST_FIRST(&xics->ics); + + /* This needs to be deprecated ... */ + xics->nr_irqs = nr_irqs; + if (ics) { + ics->nr_irqs = nr_irqs; + } } static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers, @@ -240,6 +262,7 @@ static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers, static void xics_spapr_realize(DeviceState *dev, Error **errp) { XICSState *xics = XICS_SPAPR(dev); + ICSState *ics; Error *error = NULL; int i; @@ -261,10 +284,12 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp) spapr_register_hypercall(H_EOI, h_eoi); spapr_register_hypercall(H_IPOLL, h_ipoll); - object_property_set_bool(OBJECT(xics->ics), true, "realized", &error); - if (error) { - error_propagate(errp, error); - return; + QLIST_FOREACH(ics, &xics->ics, list) { + object_property_set_bool(OBJECT(ics), true, "realized", &error); + if (error) { + error_propagate(errp, error); + return; + } } for (i = 0; i < xics->nr_servers; i++) { @@ -280,10 +305,12 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp) static void xics_spapr_initfn(Object *obj) { XICSState *xics = XICS_SPAPR(obj); + ICSState *ics; - xics->ics = ICS(object_new(TYPE_ICS)); - object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL); - xics->ics->xics = xics; + ics = ICS(object_new(TYPE_ICS)); + object_property_add_child(obj, "ics", OBJECT(ics), NULL); + ics->xics = xics; + QLIST_INSERT_HEAD(&xics->ics, ics, list); } static void xics_spapr_class_init(ObjectClass *oc, void *data) @@ -329,14 +356,15 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum) return -1; } -int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi, - Error **errp) +int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp) { - ICSState *ics = &xics->ics[src]; + ICSState *ics = QLIST_FIRST(&xics->ics); int irq; + if (!ics) { + return -1; + } if (irq_hint) { - assert(src == xics_find_source(xics, irq_hint)); if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) { error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint); return -1; @@ -352,7 +380,7 @@ int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi, } ics_set_irq_type(ics, irq - ics->offset, lsi); - trace_xics_alloc(src, irq); + trace_xics_alloc(irq); return irq; } @@ -361,13 +389,16 @@ int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi, * Allocate block of consecutive IRQs, and return the number of the first IRQ in * the block. If align==true, aligns the first IRQ number to num. */ -int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, - bool align, Error **errp) +int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align, + Error **errp) { + ICSState *ics = QLIST_FIRST(&xics->ics); int i, first = -1; - ICSState *ics = &xics->ics[src]; - assert(src == 0); + if (!ics) { + return -1; + } + /* * MSIMesage::data is used for storing VIRQ so * it has to be aligned to num to support multiple @@ -394,7 +425,7 @@ int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi, } first += ics->offset; - trace_xics_alloc_block(src, first, num, lsi, align); + trace_xics_alloc_block(first, num, lsi, align); return first; } @@ -405,7 +436,7 @@ static void ics_free(ICSState *ics, int srcno, int num) for (i = srcno; i < srcno + num; ++i) { if (ICS_IRQ_FREE(ics, i)) { - trace_xics_ics_free_warn(ics - ics->xics->ics, i + ics->offset); + trace_xics_ics_free_warn(0, i + ics->offset); } memset(&ics->irqs[i], 0, sizeof(ICSIRQState)); } @@ -413,15 +444,10 @@ static void ics_free(ICSState *ics, int srcno, int num) void xics_spapr_free(XICSState *xics, int irq, int num) { - int src = xics_find_source(xics, irq); + ICSState *ics = xics_find_source(xics, irq); - if (src >= 0) { - ICSState *ics = &xics->ics[src]; - - /* FIXME: implement multiple sources */ - assert(src == 0); - - trace_xics_ics_free(ics - xics->ics, irq, num); + if (ics) { + trace_xics_ics_free(0, irq, num); ics_free(ics, irq - ics->offset, num); } } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 4c7b6aeab6..6d3534541c 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -594,7 +594,7 @@ out_no_events: void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); - spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false, + spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, false, &error_fatal); spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 4f008654d6..a7ca988387 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -363,7 +363,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, } /* Allocate MSIs */ - irq = xics_spapr_alloc_block(spapr->xics, 0, req_num, false, + irq = xics_spapr_alloc_block(spapr->xics, req_num, false, ret_intr_type == RTAS_TYPE_MSI, &err); if (err) { error_reportf_err(err, "Can't allocate MSIs for device %x: ", @@ -1445,8 +1445,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) uint32_t irq; Error *local_err = NULL; - irq = xics_spapr_alloc_block(spapr->xics, 0, 1, true, false, - &local_err); + irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err); if (local_err) { error_propagate(errp, local_err); error_prepend(errp, "can't allocate LSIs: "); diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index d68dd35679..3648aa5960 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -453,7 +453,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) dev->qdev.id = id; } - dev->irq = xics_spapr_alloc(spapr->xics, 0, dev->irq, false, &local_err); + dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 5aac67ad89..e49a2dab93 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -85,7 +85,7 @@ struct XICSState { uint32_t nr_servers; uint32_t nr_irqs; ICPState *ss; - ICSState *ics; + QLIST_HEAD(, ICSState) ics; }; #define TYPE_ICP "icp" @@ -111,6 +111,7 @@ struct ICPState { DeviceState parent_obj; /*< public >*/ CPUState *cs; + ICSState *xirr_owner; uint32_t xirr; uint8_t pending_priority; uint8_t mfrr; @@ -145,6 +146,7 @@ struct ICSState { qemu_irq *qirqs; ICSIRQState *irqs; XICSState *xics; + QLIST_ENTRY(ICSState) list; }; static inline bool ics_valid_irq(ICSState *ics, uint32_t nr) @@ -172,10 +174,9 @@ struct ICSIRQState { #define XICS_IRQS_SPAPR 1024 qemu_irq xics_get_qirq(XICSState *icp, int irq); -int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi, - Error **errp); -int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi, - bool align, Error **errp); +int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp); +int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align, + Error **errp); void xics_spapr_free(XICSState *icp, int irq, int num); void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu); @@ -195,6 +196,6 @@ void ics_write_xive(ICSState *ics, int nr, int server, void ics_set_irq_type(ICSState *ics, int srcno, bool lsi); -int xics_find_source(XICSState *icp, int irq); +ICSState *xics_find_source(XICSState *icp, int irq); #endif /* XICS_H */ From d4d7a59a7a703220757cdc24d08a498db3e70212 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 3 Oct 2016 09:24:47 +0200 Subject: [PATCH 09/16] ppc/xics: Split ICS into ics-base and ics class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing implementation remains same and ics-base is introduced. The type name "ics" is retained, and all the related functions renamed as ics_simple_* This will allow different implementations for the source controllers such as the MSI support of PHB3 on Power8 which uses in-memory state tables for example. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Nikunj A Dadhania Reviewed-by: David Gibson [ clg: added ICS_BASE_GET_CLASS and related fixes, based on : http://patchwork.ozlabs.org/patch/646010/ ] Signed-off-by: Cédric Le Goater Signed-off-by: David Gibson --- hw/intc/trace-events | 10 +-- hw/intc/xics.c | 152 ++++++++++++++++++++++++++---------------- hw/intc/xics_kvm.c | 12 ++-- hw/intc/xics_spapr.c | 30 +++++---- include/hw/ppc/xics.h | 27 +++++--- 5 files changed, 139 insertions(+), 92 deletions(-) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index aa342a86fa..a367b46b1c 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -50,12 +50,12 @@ xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR %#"PRIx3 xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr) "icp_eoi: server %d given XIRR %#"PRIx32" new XIRR %#"PRIx32 xics_icp_irq(int server, int nr, uint8_t priority) "cpu %d trying to deliver irq %#"PRIx32" priority %#x" xics_icp_raise(uint32_t xirr, uint8_t pending_priority) "raising IRQ new XIRR=%#x new pending priority=%#x" -xics_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]" +xics_ics_simple_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]" xics_masked_pending(void) "set_irq_msi: masked pending" -xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" -xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" -xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]" -xics_ics_eoi(int nr) "ics_eoi: irq %#x" +xics_ics_simple_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" +xics_ics_simple_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" +xics_ics_simple_reject(int nr, int srcno) "reject irq %#x [src %d]" +xics_ics_simple_eoi(int nr) "ics_eoi: irq %#x" xics_alloc(int irq) "irq %d" xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d, %d irqs, lsi=%d, alignnum %d" xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs" diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 433af93254..f40b00003a 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -213,9 +213,32 @@ static const TypeInfo xics_common_info = { #define XISR(ss) (((ss)->xirr) & XISR_MASK) #define CPPR(ss) (((ss)->xirr) >> 24) -static void ics_reject(ICSState *ics, int nr); -static void ics_resend(ICSState *ics); -static void ics_eoi(ICSState *ics, int nr); +static void ics_reject(ICSState *ics, uint32_t nr) +{ + ICSStateClass *k = ICS_BASE_GET_CLASS(ics); + + if (k->reject) { + k->reject(ics, nr); + } +} + +static void ics_resend(ICSState *ics) +{ + ICSStateClass *k = ICS_BASE_GET_CLASS(ics); + + if (k->resend) { + k->resend(ics); + } +} + +static void ics_eoi(ICSState *ics, int nr) +{ + ICSStateClass *k = ICS_BASE_GET_CLASS(ics); + + if (k->eoi) { + k->eoi(ics, nr); + } +} static void icp_check_ipi(ICPState *ss) { @@ -418,7 +441,7 @@ static const TypeInfo icp_info = { /* * ICS: Source layer */ -static void resend_msi(ICSState *ics, int srcno) +static void ics_simple_resend_msi(ICSState *ics, int srcno) { ICSIRQState *irq = ics->irqs + srcno; @@ -431,7 +454,7 @@ static void resend_msi(ICSState *ics, int srcno) } } -static void resend_lsi(ICSState *ics, int srcno) +static void ics_simple_resend_lsi(ICSState *ics, int srcno) { ICSIRQState *irq = ics->irqs + srcno; @@ -443,11 +466,11 @@ static void resend_lsi(ICSState *ics, int srcno) } } -static void set_irq_msi(ICSState *ics, int srcno, int val) +static void ics_simple_set_irq_msi(ICSState *ics, int srcno, int val) { ICSIRQState *irq = ics->irqs + srcno; - trace_xics_set_irq_msi(srcno, srcno + ics->offset); + trace_xics_ics_simple_set_irq_msi(srcno, srcno + ics->offset); if (val) { if (irq->priority == 0xff) { @@ -459,31 +482,31 @@ static void set_irq_msi(ICSState *ics, int srcno, int val) } } -static void set_irq_lsi(ICSState *ics, int srcno, int val) +static void ics_simple_set_irq_lsi(ICSState *ics, int srcno, int val) { ICSIRQState *irq = ics->irqs + srcno; - trace_xics_set_irq_lsi(srcno, srcno + ics->offset); + trace_xics_ics_simple_set_irq_lsi(srcno, srcno + ics->offset); if (val) { irq->status |= XICS_STATUS_ASSERTED; } else { irq->status &= ~XICS_STATUS_ASSERTED; } - resend_lsi(ics, srcno); + ics_simple_resend_lsi(ics, srcno); } -static void ics_set_irq(void *opaque, int srcno, int val) +static void ics_simple_set_irq(void *opaque, int srcno, int val) { ICSState *ics = (ICSState *)opaque; if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { - set_irq_lsi(ics, srcno, val); + ics_simple_set_irq_lsi(ics, srcno, val); } else { - set_irq_msi(ics, srcno, val); + ics_simple_set_irq_msi(ics, srcno, val); } } -static void write_xive_msi(ICSState *ics, int srcno) +static void ics_simple_write_xive_msi(ICSState *ics, int srcno) { ICSIRQState *irq = ics->irqs + srcno; @@ -496,35 +519,35 @@ static void write_xive_msi(ICSState *ics, int srcno) icp_irq(ics, irq->server, srcno + ics->offset, irq->priority); } -static void write_xive_lsi(ICSState *ics, int srcno) +static void ics_simple_write_xive_lsi(ICSState *ics, int srcno) { - resend_lsi(ics, srcno); + ics_simple_resend_lsi(ics, srcno); } -void ics_write_xive(ICSState *ics, int nr, int server, - uint8_t priority, uint8_t saved_priority) +void ics_simple_write_xive(ICSState *ics, int srcno, int server, + uint8_t priority, uint8_t saved_priority) { - int srcno = nr - ics->offset; ICSIRQState *irq = ics->irqs + srcno; irq->server = server; irq->priority = priority; irq->saved_priority = saved_priority; - trace_xics_ics_write_xive(nr, srcno, server, priority); + trace_xics_ics_simple_write_xive(ics->offset + srcno, srcno, server, + priority); if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { - write_xive_lsi(ics, srcno); + ics_simple_write_xive_lsi(ics, srcno); } else { - write_xive_msi(ics, srcno); + ics_simple_write_xive_msi(ics, srcno); } } -static void ics_reject(ICSState *ics, int nr) +static void ics_simple_reject(ICSState *ics, uint32_t nr) { ICSIRQState *irq = ics->irqs + nr - ics->offset; - trace_xics_ics_reject(nr, nr - ics->offset); + trace_xics_ics_simple_reject(nr, nr - ics->offset); if (irq->flags & XICS_FLAGS_IRQ_MSI) { irq->status |= XICS_STATUS_REJECTED; } else if (irq->flags & XICS_FLAGS_IRQ_LSI) { @@ -532,35 +555,35 @@ static void ics_reject(ICSState *ics, int nr) } } -static void ics_resend(ICSState *ics) +static void ics_simple_resend(ICSState *ics) { int i; for (i = 0; i < ics->nr_irqs; i++) { /* FIXME: filter by server#? */ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) { - resend_lsi(ics, i); + ics_simple_resend_lsi(ics, i); } else { - resend_msi(ics, i); + ics_simple_resend_msi(ics, i); } } } -static void ics_eoi(ICSState *ics, int nr) +static void ics_simple_eoi(ICSState *ics, uint32_t nr) { int srcno = nr - ics->offset; ICSIRQState *irq = ics->irqs + srcno; - trace_xics_ics_eoi(nr); + trace_xics_ics_simple_eoi(nr); if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) { irq->status &= ~XICS_STATUS_SENT; } } -static void ics_reset(DeviceState *dev) +static void ics_simple_reset(DeviceState *dev) { - ICSState *ics = ICS(dev); + ICSState *ics = ICS_SIMPLE(dev); int i; uint8_t flags[ics->nr_irqs]; @@ -577,7 +600,7 @@ static void ics_reset(DeviceState *dev) } } -static int ics_post_load(ICSState *ics, int version_id) +static int ics_simple_post_load(ICSState *ics, int version_id) { int i; @@ -588,20 +611,20 @@ static int ics_post_load(ICSState *ics, int version_id) return 0; } -static void ics_dispatch_pre_save(void *opaque) +static void ics_simple_dispatch_pre_save(void *opaque) { ICSState *ics = opaque; - ICSStateClass *info = ICS_GET_CLASS(ics); + ICSStateClass *info = ICS_BASE_GET_CLASS(ics); if (info->pre_save) { info->pre_save(ics); } } -static int ics_dispatch_post_load(void *opaque, int version_id) +static int ics_simple_dispatch_post_load(void *opaque, int version_id) { ICSState *ics = opaque; - ICSStateClass *info = ICS_GET_CLASS(ics); + ICSStateClass *info = ICS_BASE_GET_CLASS(ics); if (info->post_load) { return info->post_load(ics, version_id); @@ -610,7 +633,7 @@ static int ics_dispatch_post_load(void *opaque, int version_id) return 0; } -static const VMStateDescription vmstate_ics_irq = { +static const VMStateDescription vmstate_ics_simple_irq = { .name = "ics/irq", .version_id = 2, .minimum_version_id = 1, @@ -624,59 +647,71 @@ static const VMStateDescription vmstate_ics_irq = { }, }; -static const VMStateDescription vmstate_ics = { +static const VMStateDescription vmstate_ics_simple = { .name = "ics", .version_id = 1, .minimum_version_id = 1, - .pre_save = ics_dispatch_pre_save, - .post_load = ics_dispatch_post_load, + .pre_save = ics_simple_dispatch_pre_save, + .post_load = ics_simple_dispatch_post_load, .fields = (VMStateField[]) { /* Sanity check */ VMSTATE_UINT32_EQUAL(nr_irqs, ICSState), VMSTATE_STRUCT_VARRAY_POINTER_UINT32(irqs, ICSState, nr_irqs, - vmstate_ics_irq, ICSIRQState), + vmstate_ics_simple_irq, + ICSIRQState), VMSTATE_END_OF_LIST() }, }; -static void ics_initfn(Object *obj) +static void ics_simple_initfn(Object *obj) { - ICSState *ics = ICS(obj); + ICSState *ics = ICS_SIMPLE(obj); ics->offset = XICS_IRQ_BASE; } -static void ics_realize(DeviceState *dev, Error **errp) +static void ics_simple_realize(DeviceState *dev, Error **errp) { - ICSState *ics = ICS(dev); + ICSState *ics = ICS_SIMPLE(dev); if (!ics->nr_irqs) { error_setg(errp, "Number of interrupts needs to be greater 0"); return; } ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState)); - ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs); + ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs); } -static void ics_class_init(ObjectClass *klass, void *data) +static void ics_simple_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - ICSStateClass *isc = ICS_CLASS(klass); + ICSStateClass *isc = ICS_BASE_CLASS(klass); - dc->realize = ics_realize; - dc->vmsd = &vmstate_ics; - dc->reset = ics_reset; - isc->post_load = ics_post_load; + dc->realize = ics_simple_realize; + dc->vmsd = &vmstate_ics_simple; + dc->reset = ics_simple_reset; + isc->post_load = ics_simple_post_load; + isc->reject = ics_simple_reject; + isc->resend = ics_simple_resend; + isc->eoi = ics_simple_eoi; } -static const TypeInfo ics_info = { - .name = TYPE_ICS, - .parent = TYPE_DEVICE, +static const TypeInfo ics_simple_info = { + .name = TYPE_ICS_SIMPLE, + .parent = TYPE_ICS_BASE, + .instance_size = sizeof(ICSState), + .class_init = ics_simple_class_init, + .class_size = sizeof(ICSStateClass), + .instance_init = ics_simple_initfn, +}; + +static const TypeInfo ics_base_info = { + .name = TYPE_ICS_BASE, + .parent = TYPE_DEVICE, + .abstract = true, .instance_size = sizeof(ICSState), - .class_init = ics_class_init, .class_size = sizeof(ICSStateClass), - .instance_init = ics_initfn, }; /* @@ -716,7 +751,8 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) static void xics_register_types(void) { type_register_static(&xics_common_info); - type_register_static(&ics_info); + type_register_static(&ics_simple_info); + type_register_static(&ics_base_info); type_register_static(&icp_info); } diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 843905cfbd..9c2f198fd1 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -272,7 +272,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val) static void ics_kvm_reset(DeviceState *dev) { - ICSState *ics = ICS(dev); + ICSState *ics = ICS_SIMPLE(dev); int i; uint8_t flags[ics->nr_irqs]; @@ -293,7 +293,7 @@ static void ics_kvm_reset(DeviceState *dev) static void ics_kvm_realize(DeviceState *dev, Error **errp) { - ICSState *ics = ICS(dev); + ICSState *ics = ICS_SIMPLE(dev); if (!ics->nr_irqs) { error_setg(errp, "Number of interrupts needs to be greater 0"); @@ -306,7 +306,7 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp) static void ics_kvm_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - ICSStateClass *icsc = ICS_CLASS(klass); + ICSStateClass *icsc = ICS_BASE_CLASS(klass); dc->realize = ics_kvm_realize; dc->reset = ics_kvm_reset; @@ -315,8 +315,8 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data) } static const TypeInfo ics_kvm_info = { - .name = TYPE_KVM_ICS, - .parent = TYPE_ICS, + .name = TYPE_ICS_KVM, + .parent = TYPE_ICS_SIMPLE, .instance_size = sizeof(ICSState), .class_init = ics_kvm_class_init, }; @@ -488,7 +488,7 @@ static void xics_kvm_initfn(Object *obj) XICSState *xics = XICS_COMMON(obj); ICSState *ics; - ics = ICS(object_new(TYPE_KVM_ICS)); + ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM)); object_property_add_child(obj, "ics", OBJECT(ics), NULL); ics->xics = xics; QLIST_INSERT_HEAD(&xics->ics, ics, list); diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 0b0845d628..e8d0623c2c 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -114,7 +114,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nret, target_ulong rets) { ICSState *ics = QLIST_FIRST(&spapr->xics->ics); - uint32_t nr, server, priority; + uint32_t nr, srcno, server, priority; if ((nargs != 3) || (nret != 1)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -135,7 +135,8 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - ics_write_xive(ics, nr, server, priority, priority); + srcno = nr - ics->offset; + ics_simple_write_xive(ics, srcno, server, priority, priority); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } @@ -146,7 +147,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nret, target_ulong rets) { ICSState *ics = QLIST_FIRST(&spapr->xics->ics); - uint32_t nr; + uint32_t nr, srcno; if ((nargs != 1) || (nret != 3)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -165,8 +166,9 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, } rtas_st(rets, 0, RTAS_OUT_SUCCESS); - rtas_st(rets, 1, ics->irqs[nr - ics->offset].server); - rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority); + srcno = nr - ics->offset; + rtas_st(rets, 1, ics->irqs[srcno].server); + rtas_st(rets, 2, ics->irqs[srcno].priority); } static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, @@ -175,7 +177,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nret, target_ulong rets) { ICSState *ics = QLIST_FIRST(&spapr->xics->ics); - uint32_t nr; + uint32_t nr, srcno; if ((nargs != 1) || (nret != 1)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -193,8 +195,9 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff, - ics->irqs[nr - ics->offset].priority); + srcno = nr - ics->offset; + ics_simple_write_xive(ics, srcno, ics->irqs[srcno].server, 0xff, + ics->irqs[srcno].priority); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } @@ -205,7 +208,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t nret, target_ulong rets) { ICSState *ics = QLIST_FIRST(&spapr->xics->ics); - uint32_t nr; + uint32_t nr, srcno; if ((nargs != 1) || (nret != 1)) { rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); @@ -223,9 +226,10 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, - ics->irqs[nr - ics->offset].saved_priority, - ics->irqs[nr - ics->offset].saved_priority); + srcno = nr - ics->offset; + ics_simple_write_xive(ics, srcno, ics->irqs[srcno].server, + ics->irqs[srcno].saved_priority, + ics->irqs[srcno].saved_priority); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } @@ -307,7 +311,7 @@ static void xics_spapr_initfn(Object *obj) XICSState *xics = XICS_SPAPR(obj); ICSState *ics; - ics = ICS(object_new(TYPE_ICS)); + ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE)); object_property_add_child(obj, "ics", OBJECT(ics), NULL); ics->xics = xics; QLIST_INSERT_HEAD(&xics->ics, ics, list); diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index e49a2dab93..66ae55ded3 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -119,22 +119,29 @@ struct ICPState { bool cap_irq_xics_enabled; }; -#define TYPE_ICS "ics" -#define ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS) +#define TYPE_ICS_BASE "ics-base" +#define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE) -#define TYPE_KVM_ICS "icskvm" -#define KVM_ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_KVM_ICS) +/* Retain ics for sPAPR for migration from existing sPAPR guests */ +#define TYPE_ICS_SIMPLE "ics" +#define ICS_SIMPLE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_SIMPLE) -#define ICS_CLASS(klass) \ - OBJECT_CLASS_CHECK(ICSStateClass, (klass), TYPE_ICS) -#define ICS_GET_CLASS(obj) \ - OBJECT_GET_CLASS(ICSStateClass, (obj), TYPE_ICS) +#define TYPE_ICS_KVM "icskvm" +#define ICS_KVM(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_KVM) + +#define ICS_BASE_CLASS(klass) \ + OBJECT_CLASS_CHECK(ICSStateClass, (klass), TYPE_ICS_BASE) +#define ICS_BASE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(ICSStateClass, (obj), TYPE_ICS_BASE) struct ICSStateClass { DeviceClass parent_class; void (*pre_save)(ICSState *s); int (*post_load)(ICSState *s, int version_id); + void (*reject)(ICSState *s, uint32_t irq); + void (*resend)(ICSState *s); + void (*eoi)(ICSState *s, uint32_t irq); }; struct ICSState { @@ -191,8 +198,8 @@ uint32_t icp_accept(ICPState *ss); uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr); void icp_eoi(XICSState *icp, int server, uint32_t xirr); -void ics_write_xive(ICSState *ics, int nr, int server, - uint8_t priority, uint8_t saved_priority); +void ics_simple_write_xive(ICSState *ics, int nr, int server, + uint8_t priority, uint8_t saved_priority); void ics_set_irq_type(ICSState *ics, int srcno, bool lsi); From cd1b354ec05035f84bb05f591cb632770ad36e7c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 12 Oct 2016 13:30:07 +1100 Subject: [PATCH 10/16] libqos: Isolate knowledge of spapr memory map to qpci_init_spapr() The libqos code for accessing PCI on the spapr machine type uses IOBASE() and MMIOBASE() macros to determine the address in the CPU memory map of the windows to PCI address space. This is a detail of the implementation of PCI in the machine type, it's not specified by the PAPR standard. Real guests would get the addresses of the PCI windows from the device tree. Finding the device tree in libqos would be awkward, but we can at least localize this knowledge of the implementation to the init function, saving it in the QPCIBusSPAPR structure for use by the accessors. That leaves only one place to fix if we alter the location of the PCI windows, as we're planning to do. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- tests/libqos/pci-spapr.c | 113 ++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c index 2f73badfd9..1765a54c80 100644 --- a/tests/libqos/pci-spapr.c +++ b/tests/libqos/pci-spapr.c @@ -18,30 +18,23 @@ /* From include/hw/pci-host/spapr.h */ -#define SPAPR_PCI_BASE_BUID 0x800000020000000ULL - -#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL - -#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL -#define SPAPR_PCI_WINDOW_SPACING 0x1000000000ULL -#define SPAPR_PCI_MMIO_WIN_OFF 0xA0000000 -#define SPAPR_PCI_MMIO_WIN_SIZE (SPAPR_PCI_WINDOW_SPACING - \ - SPAPR_PCI_MEM_WIN_BUS_OFFSET) -#define SPAPR_PCI_IO_WIN_OFF 0x80000000 -#define SPAPR_PCI_IO_WIN_SIZE 0x10000 - -/* index is the phb index */ - -#define BUIDBASE(index) (SPAPR_PCI_BASE_BUID + (index)) -#define PCIBASE(index) (SPAPR_PCI_WINDOW_BASE + \ - (index) * SPAPR_PCI_WINDOW_SPACING) -#define IOBASE(index) (PCIBASE(index) + SPAPR_PCI_IO_WIN_OFF) -#define MMIOBASE(index) (PCIBASE(index) + SPAPR_PCI_MMIO_WIN_OFF) +typedef struct QPCIWindow { + uint64_t pci_base; /* window address in PCI space */ + uint64_t size; /* window size */ +} QPCIWindow; typedef struct QPCIBusSPAPR { QPCIBus bus; QGuestAllocator *alloc; + uint64_t buid; + + uint64_t pio_cpu_base; + QPCIWindow pio; + + uint64_t mmio_cpu_base; + QPCIWindow mmio; + uint64_t pci_hole_start; uint64_t pci_hole_size; uint64_t pci_hole_alloc; @@ -59,69 +52,75 @@ typedef struct QPCIBusSPAPR { static uint8_t qpci_spapr_io_readb(QPCIBus *bus, void *addr) { + QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint64_t port = (uintptr_t)addr; uint8_t v; - if (port < SPAPR_PCI_IO_WIN_SIZE) { - v = readb(IOBASE(0) + port); + if (port < s->pio.size) { + v = readb(s->pio_cpu_base + port); } else { - v = readb(MMIOBASE(0) + port); + v = readb(s->mmio_cpu_base + port); } return v; } static uint16_t qpci_spapr_io_readw(QPCIBus *bus, void *addr) { + QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint64_t port = (uintptr_t)addr; uint16_t v; - if (port < SPAPR_PCI_IO_WIN_SIZE) { - v = readw(IOBASE(0) + port); + if (port < s->pio.size) { + v = readw(s->pio_cpu_base + port); } else { - v = readw(MMIOBASE(0) + port); + v = readw(s->mmio_cpu_base + port); } return bswap16(v); } static uint32_t qpci_spapr_io_readl(QPCIBus *bus, void *addr) { + QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint64_t port = (uintptr_t)addr; uint32_t v; - if (port < SPAPR_PCI_IO_WIN_SIZE) { - v = readl(IOBASE(0) + port); + if (port < s->pio.size) { + v = readl(s->pio_cpu_base + port); } else { - v = readl(MMIOBASE(0) + port); + v = readl(s->mmio_cpu_base + port); } return bswap32(v); } static void qpci_spapr_io_writeb(QPCIBus *bus, void *addr, uint8_t value) { + QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint64_t port = (uintptr_t)addr; - if (port < SPAPR_PCI_IO_WIN_SIZE) { - writeb(IOBASE(0) + port, value); + if (port < s->pio.size) { + writeb(s->pio_cpu_base + port, value); } else { - writeb(MMIOBASE(0) + port, value); + writeb(s->mmio_cpu_base + port, value); } } static void qpci_spapr_io_writew(QPCIBus *bus, void *addr, uint16_t value) { + QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint64_t port = (uintptr_t)addr; value = bswap16(value); - if (port < SPAPR_PCI_IO_WIN_SIZE) { - writew(IOBASE(0) + port, value); + if (port < s->pio.size) { + writew(s->pio_cpu_base + port, value); } else { - writew(MMIOBASE(0) + port, value); + writew(s->mmio_cpu_base + port, value); } } static void qpci_spapr_io_writel(QPCIBus *bus, void *addr, uint32_t value) { + QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint64_t port = (uintptr_t)addr; value = bswap32(value); - if (port < SPAPR_PCI_IO_WIN_SIZE) { - writel(IOBASE(0) + port, value); + if (port < s->pio.size) { + writel(s->pio_cpu_base + port, value); } else { - writel(MMIOBASE(0) + port, value); + writel(s->mmio_cpu_base + port, value); } } @@ -129,24 +128,21 @@ static uint8_t qpci_spapr_config_readb(QPCIBus *bus, int devfn, uint8_t offset) { QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint32_t config_addr = (devfn << 8) | offset; - return qrtas_ibm_read_pci_config(s->alloc, BUIDBASE(0), - config_addr, 1); + return qrtas_ibm_read_pci_config(s->alloc, s->buid, config_addr, 1); } static uint16_t qpci_spapr_config_readw(QPCIBus *bus, int devfn, uint8_t offset) { QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint32_t config_addr = (devfn << 8) | offset; - return qrtas_ibm_read_pci_config(s->alloc, BUIDBASE(0), - config_addr, 2); + return qrtas_ibm_read_pci_config(s->alloc, s->buid, config_addr, 2); } static uint32_t qpci_spapr_config_readl(QPCIBus *bus, int devfn, uint8_t offset) { QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint32_t config_addr = (devfn << 8) | offset; - return qrtas_ibm_read_pci_config(s->alloc, BUIDBASE(0), - config_addr, 4); + return qrtas_ibm_read_pci_config(s->alloc, s->buid, config_addr, 4); } static void qpci_spapr_config_writeb(QPCIBus *bus, int devfn, uint8_t offset, @@ -154,8 +150,7 @@ static void qpci_spapr_config_writeb(QPCIBus *bus, int devfn, uint8_t offset, { QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint32_t config_addr = (devfn << 8) | offset; - qrtas_ibm_write_pci_config(s->alloc, BUIDBASE(0), - config_addr, 1, value); + qrtas_ibm_write_pci_config(s->alloc, s->buid, config_addr, 1, value); } static void qpci_spapr_config_writew(QPCIBus *bus, int devfn, uint8_t offset, @@ -163,8 +158,7 @@ static void qpci_spapr_config_writew(QPCIBus *bus, int devfn, uint8_t offset, { QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint32_t config_addr = (devfn << 8) | offset; - qrtas_ibm_write_pci_config(s->alloc, BUIDBASE(0), - config_addr, 2, value); + qrtas_ibm_write_pci_config(s->alloc, s->buid, config_addr, 2, value); } static void qpci_spapr_config_writel(QPCIBus *bus, int devfn, uint8_t offset, @@ -172,8 +166,7 @@ static void qpci_spapr_config_writel(QPCIBus *bus, int devfn, uint8_t offset, { QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus); uint32_t config_addr = (devfn << 8) | offset; - qrtas_ibm_write_pci_config(s->alloc, BUIDBASE(0), - config_addr, 4, value); + qrtas_ibm_write_pci_config(s->alloc, s->buid, config_addr, 4, value); } static void *qpci_spapr_iomap(QPCIBus *bus, QPCIDevice *dev, int barno, @@ -242,6 +235,15 @@ static void qpci_spapr_iounmap(QPCIBus *bus, void *data) /* FIXME */ } +#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL +#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL +#define SPAPR_PCI_WINDOW_SPACING 0x1000000000ULL +#define SPAPR_PCI_MMIO_WIN_OFF 0xA0000000 +#define SPAPR_PCI_MMIO_WIN_SIZE (SPAPR_PCI_WINDOW_SPACING - \ + SPAPR_PCI_MEM_WIN_BUS_OFFSET) +#define SPAPR_PCI_IO_WIN_OFF 0x80000000 +#define SPAPR_PCI_IO_WIN_SIZE 0x10000 + QPCIBus *qpci_init_spapr(QGuestAllocator *alloc) { QPCIBusSPAPR *ret; @@ -269,6 +271,19 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc) ret->bus.iomap = qpci_spapr_iomap; ret->bus.iounmap = qpci_spapr_iounmap; + /* FIXME: We assume the default location of the PHB for now. + * Ideally we'd parse the device tree deposited in the guest to + * get the window locations */ + ret->buid = 0x800000020000000ULL; + + ret->pio_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_IO_WIN_OFF; + ret->pio.pci_base = 0; + ret->pio.size = SPAPR_PCI_IO_WIN_SIZE; + + ret->mmio_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO_WIN_OFF; + ret->mmio.pci_base = SPAPR_PCI_MEM_WIN_BUS_OFFSET; + ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE; + ret->pci_hole_start = 0xC0000000; ret->pci_hole_size = SPAPR_PCI_MMIO_WIN_SIZE; ret->pci_hole_alloc = 0; From c711369087d5964ea77b0fd0e0ff1f268e1a67ee Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 12 Oct 2016 13:38:25 +1100 Subject: [PATCH 11/16] libqos: Correct error in PCI hole sizing for spapr In pci-spapr.c (as in pci-pc.c from which it was derived), the pci_hole_start/pci_hole_size and pci_iohole_start/pci_iohole_size pairs[1] essentially define the region of PCI (not CPU) addresses in which MMIO or PIO BARs respectively will be allocated. The size value is relative to the start value. But in pci-spapr.c it is set to the entire size of the window supported by the (emulated) hardware, but the start values are *not* at the beginning of the emulated windows. That means if you tried to map enough PCI BARs, we'd messily overrun the IO windows, instead of failing in iomap as we should. This patch corrects this by calculating the hole sizes from the location of the window in PCI space and the hole start. [1] Those are bad names, but that's a problem for another time. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- tests/libqos/pci-spapr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c index 1765a54c80..3192903497 100644 --- a/tests/libqos/pci-spapr.c +++ b/tests/libqos/pci-spapr.c @@ -285,11 +285,13 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc) ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE; ret->pci_hole_start = 0xC0000000; - ret->pci_hole_size = SPAPR_PCI_MMIO_WIN_SIZE; + ret->pci_hole_size = + ret->mmio.pci_base + ret->mmio.size - ret->pci_hole_start; ret->pci_hole_alloc = 0; ret->pci_iohole_start = 0xc000; - ret->pci_iohole_size = SPAPR_PCI_IO_WIN_SIZE; + ret->pci_iohole_size = + ret->pio.pci_base + ret->pio.size - ret->pci_iohole_start; ret->pci_iohole_alloc = 0; return &ret->bus; From 8360544a6d3a54df1fce80f55ba4ad075a8ded54 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 12 Oct 2016 14:07:04 +1100 Subject: [PATCH 12/16] libqos: Limit spapr-pci to 32-bit MMIO for now Currently the functions in pci-spapr.c (like pci-pc.c on which it's based) don't distinguish between 32-bit and 64-bit PCI MMIO. At the moment, the qemu side implementation is a bit weird and has a single MMIO window straddling 32-bit and 64-bit regions, but we're likely to change that in future. In any case, pci-pc.c - and therefore the testcases using PCI - only handle 32-bit MMIOs for now. For spapr despite whatever changes might happen with the MMIO windows, the 32-bit window is likely to remain at 2..4 GiB in PCI space. So, explicitly limit pci-spapr.c to 32-bit MMIOs for now, we can add 64-bit MMIO support back in when and if we need it. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- tests/libqos/pci-spapr.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c index 3192903497..558dfc3bdc 100644 --- a/tests/libqos/pci-spapr.c +++ b/tests/libqos/pci-spapr.c @@ -32,8 +32,8 @@ typedef struct QPCIBusSPAPR { uint64_t pio_cpu_base; QPCIWindow pio; - uint64_t mmio_cpu_base; - QPCIWindow mmio; + uint64_t mmio32_cpu_base; + QPCIWindow mmio32; uint64_t pci_hole_start; uint64_t pci_hole_size; @@ -58,7 +58,7 @@ static uint8_t qpci_spapr_io_readb(QPCIBus *bus, void *addr) if (port < s->pio.size) { v = readb(s->pio_cpu_base + port); } else { - v = readb(s->mmio_cpu_base + port); + v = readb(s->mmio32_cpu_base + port); } return v; } @@ -71,7 +71,7 @@ static uint16_t qpci_spapr_io_readw(QPCIBus *bus, void *addr) if (port < s->pio.size) { v = readw(s->pio_cpu_base + port); } else { - v = readw(s->mmio_cpu_base + port); + v = readw(s->mmio32_cpu_base + port); } return bswap16(v); } @@ -84,7 +84,7 @@ static uint32_t qpci_spapr_io_readl(QPCIBus *bus, void *addr) if (port < s->pio.size) { v = readl(s->pio_cpu_base + port); } else { - v = readl(s->mmio_cpu_base + port); + v = readl(s->mmio32_cpu_base + port); } return bswap32(v); } @@ -96,7 +96,7 @@ static void qpci_spapr_io_writeb(QPCIBus *bus, void *addr, uint8_t value) if (port < s->pio.size) { writeb(s->pio_cpu_base + port, value); } else { - writeb(s->mmio_cpu_base + port, value); + writeb(s->mmio32_cpu_base + port, value); } } @@ -108,7 +108,7 @@ static void qpci_spapr_io_writew(QPCIBus *bus, void *addr, uint16_t value) if (port < s->pio.size) { writew(s->pio_cpu_base + port, value); } else { - writew(s->mmio_cpu_base + port, value); + writew(s->mmio32_cpu_base + port, value); } } @@ -120,7 +120,7 @@ static void qpci_spapr_io_writel(QPCIBus *bus, void *addr, uint32_t value) if (port < s->pio.size) { writel(s->pio_cpu_base + port, value); } else { - writel(s->mmio_cpu_base + port, value); + writel(s->mmio32_cpu_base + port, value); } } @@ -235,12 +235,9 @@ static void qpci_spapr_iounmap(QPCIBus *bus, void *data) /* FIXME */ } -#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL #define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL -#define SPAPR_PCI_WINDOW_SPACING 0x1000000000ULL -#define SPAPR_PCI_MMIO_WIN_OFF 0xA0000000 -#define SPAPR_PCI_MMIO_WIN_SIZE (SPAPR_PCI_WINDOW_SPACING - \ - SPAPR_PCI_MEM_WIN_BUS_OFFSET) +#define SPAPR_PCI_MMIO32_WIN_OFF 0xA0000000 +#define SPAPR_PCI_MMIO32_WIN_SIZE 0x80000000 /* 2 GiB */ #define SPAPR_PCI_IO_WIN_OFF 0x80000000 #define SPAPR_PCI_IO_WIN_SIZE 0x10000 @@ -280,13 +277,14 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc) ret->pio.pci_base = 0; ret->pio.size = SPAPR_PCI_IO_WIN_SIZE; - ret->mmio_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO_WIN_OFF; - ret->mmio.pci_base = SPAPR_PCI_MEM_WIN_BUS_OFFSET; - ret->mmio.size = SPAPR_PCI_MMIO_WIN_SIZE; + /* 32-bit portion of the MMIO window is at PCI address 2..4 GiB */ + ret->mmio32_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO32_WIN_OFF; + ret->mmio32.pci_base = 0x80000000; /* 2 GiB */ + ret->mmio32.size = SPAPR_PCI_MMIO32_WIN_SIZE; ret->pci_hole_start = 0xC0000000; ret->pci_hole_size = - ret->mmio.pci_base + ret->mmio.size - ret->pci_hole_start; + ret->mmio32.pci_base + ret->mmio32.size - ret->pci_hole_start; ret->pci_hole_alloc = 0; ret->pci_iohole_start = 0xc000; From 6737d9ad79f6db657c7fc5658355aa5045dbfa2c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 13 Oct 2016 10:26:09 +1100 Subject: [PATCH 13/16] spapr_pci: Delegate placement of PCI host bridges to machine type The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB) for a PAPR guest. Unlike on x86, it's routine on Power (both bare metal and PAPR guests) to have numerous independent PHBs, each controlling a separate PCI domain. There are two ways of configuring the spapr-pci-host-bridge device: first it can be done fully manually, specifying the locations and sizes of all the IO windows. This gives the most control, but is very awkward with 6 mandatory parameters. Alternatively just an "index" can be specified which essentially selects from an array of predefined PHB locations. The PHB at index 0 is automatically created as the default PHB. The current set of default locations causes some problems for guests with large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia GPGPU cards via VFIO). Obviously, for migration we can only change the locations on a new machine type, however. This is awkward, because the placement is currently decided within the spapr-pci-host-bridge code, so it breaks abstraction to look inside the machine type version. So, this patch delegates the "default mode" PHB placement from the spapr-pci-host-bridge device back to the machine type via a public method in sPAPRMachineClass. It's still a bit ugly, but it's about the best we can do. For now, this just changes where the calculation is done. It doesn't change the actual location of the host bridges, or any other behaviour. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- hw/ppc/spapr.c | 31 +++++++++++++++++++++++++++++++ hw/ppc/spapr_pci.c | 21 +++++++-------------- include/hw/pci-host/spapr.h | 11 +---------- include/hw/ppc/spapr.h | 3 +++ 4 files changed, 42 insertions(+), 24 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index fc4c3c98c8..2bfd187bc5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2370,6 +2370,36 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) return head; } +static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, hwaddr *mmio, + unsigned n_dma, uint32_t *liobns, Error **errp) +{ + const uint64_t base_buid = 0x800000020000000ULL; + const hwaddr phb0_base = 0x10000000000ULL; /* 1 TiB */ + const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */ + const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */ + const hwaddr pio_offset = 0x80000000; /* 2 GiB */ + const uint32_t max_index = 255; + + hwaddr phb_base; + int i; + + if (index > max_index) { + error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", + max_index); + return; + } + + *buid = base_buid + index; + for (i = 0; i < n_dma; ++i) { + liobns[i] = SPAPR_PCI_LIOBN(index, i); + } + + phb_base = phb0_base + index * phb_spacing; + *pio = phb_base + pio_offset; + *mmio = phb_base + mmio_offset; +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2406,6 +2436,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; + smc->phb_placement = spapr_phb_placement; } static const TypeInfo spapr_machine_info = { diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a7ca988387..8bd7f598a0 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; if (sphb->index != (uint32_t)-1) { - hwaddr windows_base; + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + Error *local_err = NULL; if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1) || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2) @@ -1322,21 +1323,13 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) return; } - if (sphb->index > SPAPR_PCI_MAX_INDEX) { - error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", - SPAPR_PCI_MAX_INDEX); + smc->phb_placement(spapr, sphb->index, &sphb->buid, + &sphb->io_win_addr, &sphb->mem_win_addr, + windows_supported, sphb->dma_liobn, &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } - - sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index; - for (i = 0; i < windows_supported; ++i) { - sphb->dma_liobn[i] = SPAPR_PCI_LIOBN(sphb->index, i); - } - - windows_base = SPAPR_PCI_WINDOW_BASE - + sphb->index * SPAPR_PCI_WINDOW_SPACING; - sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF; - sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF; } if (sphb->buid == (uint64_t)-1) { diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 30dbd461d4..8c9ebfda41 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -79,18 +79,9 @@ struct sPAPRPHBState { uint32_t numa_node; }; -#define SPAPR_PCI_MAX_INDEX 255 - -#define SPAPR_PCI_BASE_BUID 0x800000020000000ULL - #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL -#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL -#define SPAPR_PCI_WINDOW_SPACING 0x1000000000ULL -#define SPAPR_PCI_MMIO_WIN_OFF 0xA0000000 -#define SPAPR_PCI_MMIO_WIN_SIZE (SPAPR_PCI_WINDOW_SPACING - \ - SPAPR_PCI_MEM_WIN_BUS_OFFSET) -#define SPAPR_PCI_IO_WIN_OFF 0x80000000 +#define SPAPR_PCI_MMIO_WIN_SIZE 0xf80000000 #define SPAPR_PCI_IO_WIN_SIZE 0x10000 #define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 39dadaa9ce..a05783cb3f 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -40,6 +40,9 @@ struct sPAPRMachineClass { bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ const char *tcg_default_cpu; /* which (TCG) CPU to simulate by default */ + void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, hwaddr *mmio, + unsigned n_dma, uint32_t *liobns, Error **errp); }; /** From 2efff1c0dd15c8debeb1647d574bec97355cd5e6 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 13 Oct 2016 10:44:34 +1100 Subject: [PATCH 14/16] spapr: Adjust placement of PCI host bridge to allow > 1TiB RAM Currently the default PCI host bridge for the 'pseries' machine type is constructed with its IO windows in the 1TiB..(1TiB + 64GiB) range in guest memory space. This means that if > 1TiB of guest RAM is specified, the RAM will collide with the PCI IO windows, causing serious problems. Problems won't be obvious until guest RAM goes a bit beyond 1TiB, because there's a little unused space at the bottom of the area reserved for PCI, but essentially this means that > 1TiB of RAM has never worked with the pseries machine type. This patch fixes this by altering the placement of PHBs on large-RAM VMs. Instead of always placing the first PHB at 1TiB, it is placed at the next 1 TiB boundary after the maximum RAM address. Technically, this changes behaviour in a migration-breaking way for existing machines with > 1TiB maximum memory, but since having > 1 TiB memory was broken anyway, this seems like a reasonable trade-off. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- hw/ppc/spapr.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2bfd187bc5..d747e586c1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2375,15 +2375,27 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, unsigned n_dma, uint32_t *liobns, Error **errp) { const uint64_t base_buid = 0x800000020000000ULL; - const hwaddr phb0_base = 0x10000000000ULL; /* 1 TiB */ const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */ const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */ const hwaddr pio_offset = 0x80000000; /* 2 GiB */ const uint32_t max_index = 255; + const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */ - hwaddr phb_base; + uint64_t ram_top = MACHINE(spapr)->ram_size; + hwaddr phb0_base, phb_base; int i; + /* Do we have hotpluggable memory? */ + if (MACHINE(spapr)->maxram_size > ram_top) { + /* Can't just use maxram_size, because there may be an + * alignment gap between normal and hotpluggable memory + * regions */ + ram_top = spapr->hotplug_memory.base + + memory_region_size(&spapr->hotplug_memory.mr); + } + + phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment); + if (index > max_index) { error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", max_index); From daa23699031693b434ec263b212f77ba505e353e Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 11 Oct 2016 14:23:33 +1100 Subject: [PATCH 15/16] spapr_pci: Add a 64-bit MMIO window On real hardware, and under pHyp, the PCI host bridges on Power machines typically advertise two outbound MMIO windows from the guest's physical memory space to PCI memory space: - A 32-bit window which maps onto 2GiB..4GiB in the PCI address space - A 64-bit window which maps onto a large region somewhere high in PCI address space (traditionally this used an identity mapping from guest physical address to PCI address, but that's not always the case) The qemu implementation in spapr-pci-host-bridge, however, only supports a single outbound MMIO window, however. At least some Linux versions expect the two windows however, so we arranged this window to map onto the PCI memory space from 2 GiB..~64 GiB, then advertised it as two contiguous windows, the "32-bit" window from 2G..4G and the "64-bit" window from 4G..~64G. This approach means, however, that the 64G window is not naturally aligned. In turn this limits the size of the largest BAR we can map (which does have to be naturally aligned) to roughly half of the total window. With some large nVidia GPGPU cards which have huge memory BARs, this is starting to be a problem. This patch adds true support for separate 32-bit and 64-bit outbound MMIO windows to the spapr-pci-host-bridge implementation, each of which can be independently configured. The 32-bit window always maps to 2G.. in PCI space, but the PCI address of the 64-bit window can be configured (it defaults to the same as the guest physical address). So as not to break possible existing configurations, as long as a 64-bit window is not specified, a large single window can be specified. This will appear the same way to the guest as the old approach, although it's now implemented by two contiguous memory regions rather than a single one. For now, this only adds the possibility of 64-bit windows. The default configuration still uses the legacy mode. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- hw/ppc/spapr.c | 10 ++++-- hw/ppc/spapr_pci.c | 70 +++++++++++++++++++++++++++++-------- include/hw/pci-host/spapr.h | 8 +++-- include/hw/ppc/spapr.h | 3 +- 4 files changed, 72 insertions(+), 19 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d747e586c1..ea5d0e66f8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2371,7 +2371,8 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) } static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, hwaddr *mmio, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, unsigned n_dma, uint32_t *liobns, Error **errp) { const uint64_t base_buid = 0x800000020000000ULL; @@ -2409,7 +2410,12 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, phb_base = phb0_base + index * phb_spacing; *pio = phb_base + pio_offset; - *mmio = phb_base + mmio_offset; + *mmio32 = phb_base + mmio_offset; + /* + * We don't set the 64-bit MMIO window, relying on the PHB's + * fallback behaviour of automatically splitting a large "32-bit" + * window into contiguous 32-bit and 64-bit windows + */ } static void spapr_machine_class_init(ObjectClass *oc, void *data) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 8bd7f598a0..10d5de2d70 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1317,14 +1317,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1) || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2) || (sphb->mem_win_addr != (hwaddr)-1) + || (sphb->mem64_win_addr != (hwaddr)-1) || (sphb->io_win_addr != (hwaddr)-1)) { error_setg(errp, "Either \"index\" or other parameters must" " be specified for PAPR PHB, not both"); return; } - smc->phb_placement(spapr, sphb->index, &sphb->buid, - &sphb->io_win_addr, &sphb->mem_win_addr, + smc->phb_placement(spapr, sphb->index, + &sphb->buid, &sphb->io_win_addr, + &sphb->mem_win_addr, &sphb->mem64_win_addr, windows_supported, sphb->dma_liobn, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -1353,6 +1355,38 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) return; } + if (sphb->mem64_win_size != 0) { + if (sphb->mem64_win_addr == (hwaddr)-1) { + error_setg(errp, + "64-bit memory window address not specified for PHB"); + return; + } + + if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { + error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx + " (max 2 GiB)", sphb->mem_win_size); + return; + } + + if (sphb->mem64_win_pciaddr == (hwaddr)-1) { + /* 64-bit window defaults to identity mapping */ + sphb->mem64_win_pciaddr = sphb->mem64_win_addr; + } + } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { + /* + * For compatibility with old configuration, if no 64-bit MMIO + * window is specified, but the ordinary (32-bit) memory + * window is specified as > 2GiB, we treat it as a 2GiB 32-bit + * window, with a 64-bit MMIO window following on immediately + * afterwards + */ + sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE; + sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE; + sphb->mem64_win_pciaddr = + SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE; + sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE; + } + if (spapr_pci_find_phb(spapr, sphb->buid)) { error_setg(errp, "PCI host bridges must have unique BUIDs"); return; @@ -1366,12 +1400,19 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sprintf(namebuf, "%s.mmio", sphb->dtbusname); memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX); - sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname); - memory_region_init_alias(&sphb->memwindow, OBJECT(sphb), + sprintf(namebuf, "%s.mmio32-alias", sphb->dtbusname); + memory_region_init_alias(&sphb->mem32window, OBJECT(sphb), namebuf, &sphb->memspace, SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size); memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr, - &sphb->memwindow); + &sphb->mem32window); + + sprintf(namebuf, "%s.mmio64-alias", sphb->dtbusname); + memory_region_init_alias(&sphb->mem64window, OBJECT(sphb), + namebuf, &sphb->memspace, + sphb->mem64_win_pciaddr, sphb->mem64_win_size); + memory_region_add_subregion(get_system_memory(), sphb->mem64_win_addr, + &sphb->mem64window); /* Initialize IO regions */ sprintf(namebuf, "%s.io", sphb->dtbusname); @@ -1524,6 +1565,10 @@ static Property spapr_phb_properties[] = { DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1), DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, SPAPR_PCI_MMIO_WIN_SIZE), + DEFINE_PROP_UINT64("mem64_win_addr", sPAPRPHBState, mem64_win_addr, -1), + DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size, 0), + DEFINE_PROP_UINT64("mem64_win_pciaddr", sPAPRPHBState, mem64_win_pciaddr, + -1), DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1), DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size, SPAPR_PCI_IO_WIN_SIZE), @@ -1759,10 +1804,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, int bus_off, i, j, ret; char nodename[FDT_NAME_MAX]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; - const uint64_t mmiosize = memory_region_size(&phb->memwindow); - const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET; - const uint64_t w32size = MIN(w32max, mmiosize); - const uint64_t w64size = (mmiosize > w32size) ? (mmiosize - w32size) : 0; struct { uint32_t hi; uint64_t child; @@ -1777,15 +1818,16 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, { cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET), cpu_to_be64(phb->mem_win_addr), - cpu_to_be64(w32size), + cpu_to_be64(phb->mem_win_size), }, { - cpu_to_be32(b_ss(3)), cpu_to_be64(1ULL << 32), - cpu_to_be64(phb->mem_win_addr + w32size), - cpu_to_be64(w64size) + cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr), + cpu_to_be64(phb->mem64_win_addr), + cpu_to_be64(phb->mem64_win_size), }, }; - const unsigned sizeof_ranges = (w64size ? 3 : 2) * sizeof(ranges[0]); + const unsigned sizeof_ranges = + (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]); uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 }; uint32_t interrupt_map_mask[] = { cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 8c9ebfda41..23dfb09d36 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -53,8 +53,10 @@ struct sPAPRPHBState { bool dr_enabled; MemoryRegion memspace, iospace; - hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size; - MemoryRegion memwindow, iowindow, msiwindow; + hwaddr mem_win_addr, mem_win_size, mem64_win_addr, mem64_win_size; + uint64_t mem64_win_pciaddr; + hwaddr io_win_addr, io_win_size; + MemoryRegion mem32window, mem64window, iowindow, msiwindow; uint32_t dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]; hwaddr dma_win_addr, dma_win_size; @@ -80,6 +82,8 @@ struct sPAPRPHBState { }; #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL +#define SPAPR_PCI_MEM32_WIN_SIZE \ + ((1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET) #define SPAPR_PCI_MMIO_WIN_SIZE 0xf80000000 #define SPAPR_PCI_IO_WIN_SIZE 0x10000 diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index a05783cb3f..aeaba3edf9 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -41,7 +41,8 @@ struct sPAPRMachineClass { bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ const char *tcg_default_cpu; /* which (TCG) CPU to simulate by default */ void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, hwaddr *mmio, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, unsigned n_dma, uint32_t *liobns, Error **errp); }; From 357d1e3bc7d2d80e5271bc4f3ac8537e30dc8046 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Sun, 16 Oct 2016 12:04:15 +1100 Subject: [PATCH 16/16] spapr: Improved placement of PCI host bridges in guest memory map Currently, the MMIO space for accessing PCI on pseries guests begins at 1 TiB in guest address space. Each PCI host bridge (PHB) has a 64 GiB chunk of address space in which it places its outbound PIO and 32-bit and 64-bit MMIO windows. This scheme as several problems: - It limits guest RAM to 1 TiB (though we have a limited fix for this now) - It limits the total MMIO window to 64 GiB. This is not always enough for some of the large nVidia GPGPU cards - Putting all the windows into a single 64 GiB area means that naturally aligning things within there will waste more address space. In addition there was a miscalculation in some of the defaults, which meant that the MMIO windows for each PHB actually slightly overran the 64 GiB region for that PHB. We got away without nasty consequences because the overrun fit within an unused area at the beginning of the next PHB's region, but it's not pretty. This patch implements a new scheme which addresses those problems, and is also closer to what bare metal hardware and pHyp guests generally use. Because some guest versions (including most current distro kernels) can't access PCI MMIO above 64 TiB, we put all the PCI windows between 32 TiB and 64 TiB. This is broken into 1 TiB chunks. The first 1 TiB contains the PIO (64 kiB) and 32-bit MMIO (2 GiB) windows for all of the PHBs. Each subsequent TiB chunk contains a naturally aligned 64-bit MMIO window for one PHB each. This reduces the number of allowed PHBs (without full manual configuration of all the windows) from 256 to 31, but this should still be plenty in practice. We also change some of the default window sizes for manually configured PHBs to saner values. Finally we adjust some tests and libqos so that it correctly uses the new default locations. Ideally it would parse the device tree given to the guest, but that's a more complex problem for another time. Signed-off-by: David Gibson Reviewed-by: Laurent Vivier --- hw/ppc/spapr.c | 122 +++++++++++++++++++++++++++--------- hw/ppc/spapr_pci.c | 5 +- include/hw/pci-host/spapr.h | 8 ++- tests/endianness-test.c | 3 +- tests/libqos/pci-spapr.c | 9 ++- tests/spapr-phb-test.c | 2 +- 6 files changed, 109 insertions(+), 40 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ea5d0e66f8..ddb7438434 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2375,31 +2375,38 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, hwaddr *mmio32, hwaddr *mmio64, unsigned n_dma, uint32_t *liobns, Error **errp) { + /* + * New-style PHB window placement. + * + * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window + * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO + * windows. + * + * Some guest kernels can't work with MMIO windows above 1<<46 + * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB + * + * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each + * PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the + * 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the + * 1TiB 64-bit MMIO windows for each PHB. + */ const uint64_t base_buid = 0x800000020000000ULL; - const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */ - const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */ - const hwaddr pio_offset = 0x80000000; /* 2 GiB */ - const uint32_t max_index = 255; - const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */ - - uint64_t ram_top = MACHINE(spapr)->ram_size; - hwaddr phb0_base, phb_base; + const int max_phbs = + (SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / SPAPR_PCI_MEM64_WIN_SIZE - 1; int i; - /* Do we have hotpluggable memory? */ - if (MACHINE(spapr)->maxram_size > ram_top) { - /* Can't just use maxram_size, because there may be an - * alignment gap between normal and hotpluggable memory - * regions */ - ram_top = spapr->hotplug_memory.base + - memory_region_size(&spapr->hotplug_memory.mr); - } + /* Sanity check natural alignments */ + QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0); + /* Sanity check bounds */ + QEMU_BUILD_BUG_ON((max_phbs * SPAPR_PCI_IO_WIN_SIZE) > SPAPR_PCI_MEM32_WIN_SIZE); + QEMU_BUILD_BUG_ON((max_phbs * SPAPR_PCI_MEM32_WIN_SIZE) > SPAPR_PCI_MEM64_WIN_SIZE); - phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment); - - if (index > max_index) { + if (index >= max_phbs) { error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", - max_index); + max_phbs - 1); return; } @@ -2408,14 +2415,9 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, liobns[i] = SPAPR_PCI_LIOBN(index, i); } - phb_base = phb0_base + index * phb_spacing; - *pio = phb_base + pio_offset; - *mmio32 = phb_base + mmio_offset; - /* - * We don't set the 64-bit MMIO window, relying on the PHB's - * fallback behaviour of automatically splitting a large "32-bit" - * window into contiguous 32-bit and 64-bit windows - */ + *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE; + *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE; + *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE; } static void spapr_machine_class_init(ObjectClass *oc, void *data) @@ -2519,8 +2521,67 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", true); /* * pseries-2.7 */ -#define SPAPR_COMPAT_2_7 \ - HW_COMPAT_2_7 \ +#define SPAPR_COMPAT_2_7 \ + HW_COMPAT_2_7 \ + { \ + .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ + .property = "mem_win_size", \ + .value = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\ + }, \ + { \ + .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ + .property = "mem64_win_size", \ + .value = "0", \ + }, + +static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, + unsigned n_dma, uint32_t *liobns, Error **errp) +{ + /* Legacy PHB placement for pseries-2.7 and earlier machine types */ + const uint64_t base_buid = 0x800000020000000ULL; + const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */ + const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */ + const hwaddr pio_offset = 0x80000000; /* 2 GiB */ + const uint32_t max_index = 255; + const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */ + + uint64_t ram_top = MACHINE(spapr)->ram_size; + hwaddr phb0_base, phb_base; + int i; + + /* Do we have hotpluggable memory? */ + if (MACHINE(spapr)->maxram_size > ram_top) { + /* Can't just use maxram_size, because there may be an + * alignment gap between normal and hotpluggable memory + * regions */ + ram_top = spapr->hotplug_memory.base + + memory_region_size(&spapr->hotplug_memory.mr); + } + + phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment); + + if (index > max_index) { + error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", + max_index); + return; + } + + *buid = base_buid + index; + for (i = 0; i < n_dma; ++i) { + liobns[i] = SPAPR_PCI_LIOBN(index, i); + } + + phb_base = phb0_base + index * phb_spacing; + *pio = phb_base + pio_offset; + *mmio32 = phb_base + mmio_offset; + /* + * We don't set the 64-bit MMIO window, relying on the PHB's + * fallback behaviour of automatically splitting a large "32-bit" + * window into contiguous 32-bit and 64-bit windows + */ +} static void spapr_machine_2_7_instance_options(MachineState *machine) { @@ -2534,6 +2595,7 @@ static void spapr_machine_2_7_class_options(MachineClass *mc) spapr_machine_2_8_class_options(mc); smc->tcg_default_cpu = "POWER7"; SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_7); + smc->phb_placement = phb_placement_2_7; } DEFINE_SPAPR_MACHINE(2_7, "2.7", false); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 10d5de2d70..2a1ccf59ea 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1564,9 +1564,10 @@ static Property spapr_phb_properties[] = { DEFINE_PROP_UINT32("liobn64", sPAPRPHBState, dma_liobn[1], -1), DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1), DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, - SPAPR_PCI_MMIO_WIN_SIZE), + SPAPR_PCI_MEM32_WIN_SIZE), DEFINE_PROP_UINT64("mem64_win_addr", sPAPRPHBState, mem64_win_addr, -1), - DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size, 0), + DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size, + SPAPR_PCI_MEM64_WIN_SIZE), DEFINE_PROP_UINT64("mem64_win_pciaddr", sPAPRPHBState, mem64_win_pciaddr, -1), DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1), diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 23dfb09d36..b92c1b59f1 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -84,8 +84,14 @@ struct sPAPRPHBState { #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL #define SPAPR_PCI_MEM32_WIN_SIZE \ ((1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET) +#define SPAPR_PCI_MEM64_WIN_SIZE 0x10000000000ULL /* 1 TiB */ -#define SPAPR_PCI_MMIO_WIN_SIZE 0xf80000000 +/* Without manual configuration, all PCI outbound windows will be + * within this range */ +#define SPAPR_PCI_BASE (1ULL << 45) /* 32 TiB */ +#define SPAPR_PCI_LIMIT (1ULL << 46) /* 64 TiB */ + +#define SPAPR_PCI_2_7_MMIO_WIN_SIZE 0xf80000000 #define SPAPR_PCI_IO_WIN_SIZE 0x10000 #define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL diff --git a/tests/endianness-test.c b/tests/endianness-test.c index b7a120e0a4..cf8d41b7b4 100644 --- a/tests/endianness-test.c +++ b/tests/endianness-test.c @@ -38,7 +38,8 @@ static const TestCase test_cases[] = { { "ppc", "prep", 0x80000000, .bswap = true }, { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, - { "ppc64", "pseries", 0x10080000000ULL, + { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, + { "ppc64", "pseries-2.7", 0x10080000000ULL, .bswap = true, .superio = "i82378" }, { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c index 558dfc3bdc..2eaaf9159a 100644 --- a/tests/libqos/pci-spapr.c +++ b/tests/libqos/pci-spapr.c @@ -235,10 +235,9 @@ static void qpci_spapr_iounmap(QPCIBus *bus, void *data) /* FIXME */ } -#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL -#define SPAPR_PCI_MMIO32_WIN_OFF 0xA0000000 +#define SPAPR_PCI_BASE (1ULL << 45) + #define SPAPR_PCI_MMIO32_WIN_SIZE 0x80000000 /* 2 GiB */ -#define SPAPR_PCI_IO_WIN_OFF 0x80000000 #define SPAPR_PCI_IO_WIN_SIZE 0x10000 QPCIBus *qpci_init_spapr(QGuestAllocator *alloc) @@ -273,12 +272,12 @@ QPCIBus *qpci_init_spapr(QGuestAllocator *alloc) * get the window locations */ ret->buid = 0x800000020000000ULL; - ret->pio_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_IO_WIN_OFF; + ret->pio_cpu_base = SPAPR_PCI_BASE; ret->pio.pci_base = 0; ret->pio.size = SPAPR_PCI_IO_WIN_SIZE; /* 32-bit portion of the MMIO window is at PCI address 2..4 GiB */ - ret->mmio32_cpu_base = SPAPR_PCI_WINDOW_BASE + SPAPR_PCI_MMIO32_WIN_OFF; + ret->mmio32_cpu_base = SPAPR_PCI_BASE + SPAPR_PCI_MMIO32_WIN_SIZE; ret->mmio32.pci_base = 0x80000000; /* 2 GiB */ ret->mmio32.size = SPAPR_PCI_MMIO32_WIN_SIZE; diff --git a/tests/spapr-phb-test.c b/tests/spapr-phb-test.c index 21004a76ec..d3522ea093 100644 --- a/tests/spapr-phb-test.c +++ b/tests/spapr-phb-test.c @@ -25,7 +25,7 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); qtest_add_func("/spapr-phb/device", test_phb_device); - qtest_start("-device " TYPE_SPAPR_PCI_HOST_BRIDGE ",index=100"); + qtest_start("-device " TYPE_SPAPR_PCI_HOST_BRIDGE ",index=30"); ret = g_test_run();