diff --git a/MAINTAINERS b/MAINTAINERS index e350c4e6a198..dec0e3a84d4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19124,7 +19124,7 @@ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER M: Alexandra Winter -M: Wenjia Zhang +M: Thorsten Winkler L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported @@ -19143,7 +19143,7 @@ F: arch/s390/mm S390 NETWORK DRIVERS M: Alexandra Winter -M: Wenjia Zhang +M: Thorsten Winkler L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8f39f0424796..fe565f3a3a91 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -216,7 +216,6 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI - select KEXEC select MMU_GATHER_MERGE_VMAS select MMU_GATHER_NO_GATHER select MMU_GATHER_RCU_TABLE_FREE @@ -443,7 +442,7 @@ config COMMAND_LINE_SIZE line. config COMPAT - def_bool y + def_bool n prompt "Kernel support for 31 bit emulation" select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION @@ -454,7 +453,9 @@ config COMPAT Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option (and some other stuff like libraries and such) is needed for - executing 31 bit applications. It is safe to say "Y". + executing 31 bit applications. + + If unsure say N. config SMP def_bool y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 85490d9373fc..67ba0157fbdb 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -40,6 +40,7 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index fb690fbbf54b..4c2650c1fbdd 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y +CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 47028450eee1..30d2a1687665 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -10,7 +10,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_CRASH_DUMP=y CONFIG_MARCH_Z13=y -# CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 287bb88f7698..2686bee800e3 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -11,6 +11,8 @@ /* I/O size constraints */ #define ZPCI_MAX_READ_SIZE 8 #define ZPCI_MAX_WRITE_SIZE 128 +#define ZPCI_BOUNDARY_SIZE (1 << 12) +#define ZPCI_BOUNDARY_MASK (ZPCI_BOUNDARY_SIZE - 1) /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 @@ -125,16 +127,18 @@ static inline int zpci_read_single(void *dst, const volatile void __iomem *src, int zpci_write_block(volatile void __iomem *dst, const void *src, unsigned long len); -static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) +static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max) { - int count = len > max ? max : len, size = 1; + int offset = dst & ZPCI_BOUNDARY_MASK; + int size; - while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) { - dst = dst >> 1; - src = src >> 1; - size = size << 1; - } - return size; + size = min3(len, ZPCI_BOUNDARY_SIZE - offset, max); + if (IS_ALIGNED(src, 8) && IS_ALIGNED(dst, 8) && IS_ALIGNED(size, 8)) + return size; + + if (size >= 8) + return 8; + return rounddown_pow_of_two(size); } static inline int zpci_memcpy_fromio(void *dst, @@ -144,9 +148,9 @@ static inline int zpci_memcpy_fromio(void *dst, int size, rc = 0; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) src, - (u64) dst, n, - ZPCI_MAX_READ_SIZE); + size = zpci_get_max_io_size((u64 __force) src, + (u64) dst, n, + ZPCI_MAX_READ_SIZE); rc = zpci_read_single(dst, src, size); if (rc) break; @@ -166,9 +170,9 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst, return -EINVAL; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) dst, - (u64) src, n, - ZPCI_MAX_WRITE_SIZE); + size = zpci_get_max_io_size((u64 __force) dst, + (u64) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = zpci_write_block(dst, src, size); else diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 9e7c15fccfea..a4f3449cc814 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -208,7 +208,6 @@ void __load_fpu_regs(void) } clear_cpu_flag(CIF_FPU); } -EXPORT_SYMBOL(__load_fpu_regs); void load_fpu_regs(void) { diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 39a91b00438a..bf8a672b15a4 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -111,11 +111,11 @@ static void paicrypt_event_destroy(struct perf_event *event) mutex_unlock(&pai_reserve_mutex); } -static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel) +static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel) { if (kernel) nr += PAI_CRYPTO_MAXCTR; - return cpump->page[nr]; + return page[nr]; } /* Read the counter values. Return value from location in CMP. For event @@ -129,13 +129,13 @@ static u64 paicrypt_getdata(struct perf_event *event, bool kernel) int i; if (event->attr.config != PAI_CRYPTO_BASE) { - return paicrypt_getctr(cpump, + return paicrypt_getctr(cpump->page, event->attr.config - PAI_CRYPTO_BASE, kernel); } for (i = 1; i <= paicrypt_cnt; i++) { - u64 val = paicrypt_getctr(cpump, i, kernel); + u64 val = paicrypt_getctr(cpump->page, i, kernel); if (!val) continue; @@ -317,10 +317,14 @@ static void paicrypt_start(struct perf_event *event, int flags) * Events are added, deleted and re-added when 2 or more events * are active at the same time. */ - if (!event->hw.last_tag) { - event->hw.last_tag = 1; - sum = paicrypt_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); + if (!event->attr.sample_period) { /* Counting */ + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paicrypt_getall(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } + } else { /* Sampling */ + perf_sched_cb_inc(event->pmu); } } @@ -336,19 +340,18 @@ static int paicrypt_add(struct perf_event *event, int flags) local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); } cpump->event = event; - if (flags & PERF_EF_START && !event->attr.sample_period) { - /* Only counting needs initial counter value */ + if (flags & PERF_EF_START) paicrypt_start(event, PERF_EF_RELOAD); - } event->hw.state = 0; - if (event->attr.sample_period) - perf_sched_cb_inc(event->pmu); return 0; } static void paicrypt_stop(struct perf_event *event, int flags) { - paicrypt_read(event); + if (!event->attr.sample_period) /* Counting */ + paicrypt_read(event); + else /* Sampling */ + perf_sched_cb_dec(event->pmu); event->hw.state = PERF_HES_STOPPED; } @@ -357,11 +360,7 @@ static void paicrypt_del(struct perf_event *event, int flags) struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); struct paicrypt_map *cpump = mp->mapptr; - if (event->attr.sample_period) - perf_sched_cb_dec(event->pmu); - if (!event->attr.sample_period) - /* Only counting needs to read counter */ - paicrypt_stop(event, PERF_EF_UPDATE); + paicrypt_stop(event, PERF_EF_UPDATE); if (--cpump->active_events == 0) { local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT); WRITE_ONCE(S390_lowcore.ccd, 0); @@ -373,8 +372,7 @@ static void paicrypt_del(struct perf_event *event, int flags) * 2 bytes: Number of counter * 8 bytes: Value of counter */ -static size_t paicrypt_copy(struct pai_userdata *userdata, - struct paicrypt_map *cpump, +static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page, bool exclude_user, bool exclude_kernel) { int i, outidx = 0; @@ -383,9 +381,9 @@ static size_t paicrypt_copy(struct pai_userdata *userdata, u64 val = 0; if (!exclude_kernel) - val += paicrypt_getctr(cpump, i, true); + val += paicrypt_getctr(page, i, true); if (!exclude_user) - val += paicrypt_getctr(cpump, i, false); + val += paicrypt_getctr(page, i, false); if (val) { userdata[outidx].num = i; userdata[outidx].value = val; @@ -395,25 +393,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata, return outidx * sizeof(struct pai_userdata); } -static int paicrypt_push_sample(void) +static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, + struct perf_event *event) { - struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); - struct paicrypt_map *cpump = mp->mapptr; - struct perf_event *event = cpump->event; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; - size_t rawsize; int overflow; - if (!cpump->event) /* No event active */ - return 0; - rawsize = paicrypt_copy(cpump->save, cpump, - cpump->event->attr.exclude_user, - cpump->event->attr.exclude_kernel); - if (!rawsize) /* No incremented counters */ - return 0; - /* Setup perf sample */ memset(®s, 0, sizeof(regs)); memset(&raw, 0, sizeof(raw)); @@ -444,6 +431,25 @@ static int paicrypt_push_sample(void) return overflow; } +/* Check if there is data to be saved on schedule out of a task. */ +static int paicrypt_have_sample(void) +{ + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; + struct perf_event *event = cpump->event; + size_t rawsize; + int rc = 0; + + if (!event) /* No event active */ + return 0; + rawsize = paicrypt_copy(cpump->save, cpump->page, + cpump->event->attr.exclude_user, + cpump->event->attr.exclude_kernel); + if (rawsize) /* No incremented counters */ + rc = paicrypt_push_sample(rawsize, cpump, event); + return rc; +} + /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ @@ -453,7 +459,7 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sch * results on schedule_out and if page was dirty, clear values. */ if (!sched_in) - paicrypt_push_sample(); + paicrypt_have_sample(); } /* Attribute definitions for paicrypt interface. As with other CPU diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index e7013a2e8960..af7f2b538c8f 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -276,9 +276,9 @@ static int paiext_event_init(struct perf_event *event) return 0; } -static u64 paiext_getctr(struct paiext_map *cpump, int nr) +static u64 paiext_getctr(unsigned long *area, int nr) { - return cpump->area[nr]; + return area[nr]; } /* Read the counter values. Return value from location in buffer. For event @@ -292,10 +292,11 @@ static u64 paiext_getdata(struct perf_event *event) int i; if (event->attr.config != PAI_NNPA_BASE) - return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE); + return paiext_getctr(cpump->area, + event->attr.config - PAI_NNPA_BASE); for (i = 1; i <= paiext_cnt; i++) - sum += paiext_getctr(cpump, i); + sum += paiext_getctr(cpump->area, i); return sum; } @@ -320,11 +321,15 @@ static void paiext_start(struct perf_event *event, int flags) { u64 sum; - if (event->hw.last_tag) - return; - event->hw.last_tag = 1; - sum = paiext_getall(event); /* Get current value */ - local64_set(&event->hw.prev_count, sum); + if (!event->attr.sample_period) { /* Counting */ + if (!event->hw.last_tag) { + event->hw.last_tag = 1; + sum = paiext_getall(event); /* Get current value */ + local64_set(&event->hw.prev_count, sum); + } + } else { /* Sampling */ + perf_sched_cb_inc(event->pmu); + } } static int paiext_add(struct perf_event *event, int flags) @@ -341,21 +346,19 @@ static int paiext_add(struct perf_event *event, int flags) debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n", __func__, S390_lowcore.aicd, pcb->acc); } - if (flags & PERF_EF_START && !event->attr.sample_period) { - /* Only counting needs initial counter value */ + cpump->event = event; + if (flags & PERF_EF_START) paiext_start(event, PERF_EF_RELOAD); - } event->hw.state = 0; - if (event->attr.sample_period) { - cpump->event = event; - perf_sched_cb_inc(event->pmu); - } return 0; } static void paiext_stop(struct perf_event *event, int flags) { - paiext_read(event); + if (!event->attr.sample_period) /* Counting */ + paiext_read(event); + else /* Sampling */ + perf_sched_cb_dec(event->pmu); event->hw.state = PERF_HES_STOPPED; } @@ -365,12 +368,7 @@ static void paiext_del(struct perf_event *event, int flags) struct paiext_map *cpump = mp->mapptr; struct paiext_cb *pcb = cpump->paiext_cb; - if (event->attr.sample_period) - perf_sched_cb_dec(event->pmu); - if (!event->attr.sample_period) { - /* Only counting needs to read counter */ - paiext_stop(event, PERF_EF_UPDATE); - } + paiext_stop(event, PERF_EF_UPDATE); if (--cpump->active_events == 0) { /* Disable CPU instruction lookup for PAIE1 control block */ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT); @@ -386,13 +384,12 @@ static void paiext_del(struct perf_event *event, int flags) * 2 bytes: Number of counter * 8 bytes: Value of counter */ -static size_t paiext_copy(struct paiext_map *cpump) +static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area) { - struct pai_userdata *userdata = cpump->save; int i, outidx = 0; for (i = 1; i <= paiext_cnt; i++) { - u64 val = paiext_getctr(cpump, i); + u64 val = paiext_getctr(area, i); if (val) { userdata[outidx].num = i; @@ -418,21 +415,14 @@ static size_t paiext_copy(struct paiext_map *cpump) * sched_task() callback. That callback is not active after paiext_del() * returns and has deleted the event on that CPU. */ -static int paiext_push_sample(void) +static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, + struct perf_event *event) { - struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); - struct paiext_map *cpump = mp->mapptr; - struct perf_event *event = cpump->event; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; - size_t rawsize; int overflow; - rawsize = paiext_copy(cpump); - if (!rawsize) /* No incremented counters */ - return 0; - /* Setup perf sample */ memset(®s, 0, sizeof(regs)); memset(&raw, 0, sizeof(raw)); @@ -461,6 +451,23 @@ static int paiext_push_sample(void) return overflow; } +/* Check if there is data to be saved on schedule out of a task. */ +static int paiext_have_sample(void) +{ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + struct perf_event *event = cpump->event; + size_t rawsize; + int rc = 0; + + if (!event) + return 0; + rawsize = paiext_copy(cpump->save, cpump->area); + if (rawsize) /* Incremented counters */ + rc = paiext_push_sample(rawsize, cpump, event); + return rc; +} + /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ @@ -470,7 +477,7 @@ static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched * results on schedule_out and if page was dirty, clear values. */ if (!sched_in) - paiext_push_sample(); + paiext_have_sample(); } /* Attribute definitions for pai extension1 interface. As with other CPU diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2e6754b62b20..f1897a8bb221 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -917,7 +917,6 @@ static int s390_fpregs_set(struct task_struct *target, else memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs)); - /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ab4098886e56..ac4c78546d97 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -280,7 +280,6 @@ static void do_sigbus(struct pt_regs *regs) static void do_exception(struct pt_regs *regs, int access) { struct vm_area_struct *vma; - struct task_struct *tsk; unsigned long address; struct mm_struct *mm; enum fault_type type; @@ -289,7 +288,6 @@ static void do_exception(struct pt_regs *regs, int access) vm_fault_t fault; bool is_write; - tsk = current; /* * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. @@ -297,7 +295,7 @@ static void do_exception(struct pt_regs *regs, int access) clear_thread_flag(TIF_PER_TRAP); if (kprobe_page_fault(regs, 14)) return; - mm = tsk->mm; + mm = current->mm; address = get_fault_address(regs); is_write = fault_is_write(regs); type = get_fault_type(regs); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 588089332931..a90499c087f0 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -97,9 +97,9 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, return -EINVAL; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) dst, - (u64 __force) src, n, - ZPCI_MAX_WRITE_SIZE); + size = zpci_get_max_io_size((u64 __force) dst, + (u64 __force) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = __pcistb_mio_inuser(dst, src, size, &status); else @@ -242,9 +242,9 @@ static inline int __memcpy_fromio_inuser(void __user *dst, u8 status; while (n > 0) { - size = zpci_get_max_write_size((u64 __force) src, - (u64 __force) dst, n, - ZPCI_MAX_READ_SIZE); + size = zpci_get_max_io_size((u64 __force) src, + (u64 __force) dst, n, + ZPCI_MAX_READ_SIZE); rc = __pcilg_mio_inuser(dst, src, size, &status); if (rc) break; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index acb710d3d7bc..983b3b16196c 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -32,7 +32,8 @@ #define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */ -static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable); +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev); +static int vfio_ap_mdev_reset_qlist(struct list_head *qlist); static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static const struct vfio_device_ops vfio_ap_matrix_dev_ops; static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q); @@ -665,17 +666,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev) * device driver. * * @matrix_mdev: the matrix mdev whose matrix is to be filtered. + * @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the + * guest's AP configuration that are still in the host's AP + * configuration. * * Note: If an APQN referencing a queue device that is not bound to the vfio_ap * driver, its APID will be filtered from the guest's APCB. The matrix * structure precludes filtering an individual APQN, so its APID will be - * filtered. + * filtered. Consequently, all queues associated with the adapter that + * are in the host's AP configuration must be reset. If queues are + * subsequently made available again to the guest, they should re-appear + * in a reset state * * Return: a boolean value indicating whether the KVM guest's APCB was changed * by the filtering or not. */ -static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, - struct ap_matrix_mdev *matrix_mdev) +static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm_filtered) { unsigned long apid, apqi, apqn; DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES); @@ -685,6 +692,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES); bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS); vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb); + bitmap_clear(apm_filtered, 0, AP_DEVICES); /* * Copy the adapters, domains and control domains to the shadow_apcb @@ -696,8 +704,9 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm, (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS); - for_each_set_bit_inv(apid, apm, AP_DEVICES) { - for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { + for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) { + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, + AP_DOMAINS) { /* * If the APQN is not bound to the vfio_ap device * driver, then we can't assign it to the guest's @@ -709,8 +718,16 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm, apqn = AP_MKQID(apid, apqi); q = vfio_ap_mdev_get_queue(matrix_mdev, apqn); if (!q || q->reset_status.response_code) { - clear_bit_inv(apid, - matrix_mdev->shadow_apcb.apm); + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); + + /* + * If the adapter was previously plugged into + * the guest, let's let the caller know that + * the APID was filtered. + */ + if (test_bit_inv(apid, prev_shadow_apm)) + set_bit_inv(apid, apm_filtered); + break; } } @@ -812,7 +829,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev) mutex_lock(&matrix_dev->guests_lock); mutex_lock(&matrix_dev->mdevs_lock); - vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_reset_queues(matrix_mdev); vfio_ap_mdev_unlink_fr_queues(matrix_mdev); list_del(&matrix_mdev->node); mutex_unlock(&matrix_dev->mdevs_lock); @@ -922,6 +939,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev, AP_MKQID(apid, apqi)); } +static void collect_queues_to_reset(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid, + struct list_head *qlist) +{ + struct vfio_ap_queue *q; + unsigned long apqi; + + for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) { + q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi)); + if (q) + list_add_tail(&q->reset_qnode, qlist); + } +} + +static void reset_queues_for_apid(struct ap_matrix_mdev *matrix_mdev, + unsigned long apid) +{ + struct list_head qlist; + + INIT_LIST_HEAD(&qlist); + collect_queues_to_reset(matrix_mdev, apid, &qlist); + vfio_ap_mdev_reset_qlist(&qlist); +} + +static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev, + unsigned long *apm_reset) +{ + struct list_head qlist; + unsigned long apid; + + if (bitmap_empty(apm_reset, AP_DEVICES)) + return 0; + + INIT_LIST_HEAD(&qlist); + + for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) + collect_queues_to_reset(matrix_mdev, apid, &qlist); + + return vfio_ap_mdev_reset_qlist(&qlist); +} + /** * assign_adapter_store - parses the APID from @buf and sets the * corresponding bit in the mediated matrix device's APM @@ -962,7 +1020,7 @@ static ssize_t assign_adapter_store(struct device *dev, { int ret; unsigned long apid; - DECLARE_BITMAP(apm_delta, AP_DEVICES); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -991,12 +1049,11 @@ static ssize_t assign_adapter_store(struct device *dev, } vfio_ap_mdev_link_adapter(matrix_mdev, apid); - memset(apm_delta, 0, sizeof(apm_delta)); - set_bit_inv(apid, apm_delta); - if (vfio_ap_mdev_filter_matrix(apm_delta, - matrix_mdev->matrix.aqm, matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } ret = count; done: @@ -1027,11 +1084,12 @@ static struct vfio_ap_queue * adapter was assigned. * @matrix_mdev: the matrix mediated device to which the adapter was assigned. * @apid: the APID of the unassigned adapter. - * @qtable: table for storing queues associated with unassigned adapter. + * @qlist: list for storing queues associated with unassigned adapter that + * need to be reset. */ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, unsigned long apid, - struct ap_queue_table *qtable) + struct list_head *qlist) { unsigned long apqi; struct vfio_ap_queue *q; @@ -1039,11 +1097,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) { q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); - if (q && qtable) { + if (q && qlist) { if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) - hash_add(qtable->queues, &q->mdev_qnode, - q->apqn); + list_add_tail(&q->reset_qnode, qlist); } } } @@ -1051,26 +1108,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev, static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev, unsigned long apid) { - int loop_cursor; - struct vfio_ap_queue *q; - struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + struct vfio_ap_queue *q, *tmpq; + struct list_head qlist; - hash_init(qtable->queues); - vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable); + INIT_LIST_HEAD(&qlist); + vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist); if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) { clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); } - vfio_ap_mdev_reset_queues(qtable); + vfio_ap_mdev_reset_qlist(&qlist); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) { vfio_ap_unlink_mdev_fr_queue(q); - hash_del(&q->mdev_qnode); + list_del(&q->reset_qnode); } - - kfree(qtable); } /** @@ -1171,7 +1225,7 @@ static ssize_t assign_domain_store(struct device *dev, { int ret; unsigned long apqi; - DECLARE_BITMAP(aqm_delta, AP_DOMAINS); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); mutex_lock(&ap_perms_mutex); @@ -1200,12 +1254,11 @@ static ssize_t assign_domain_store(struct device *dev, } vfio_ap_mdev_link_domain(matrix_mdev, apqi); - memset(aqm_delta, 0, sizeof(aqm_delta)); - set_bit_inv(apqi, aqm_delta); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta, - matrix_mdev)) + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } ret = count; done: @@ -1218,7 +1271,7 @@ static DEVICE_ATTR_WO(assign_domain); static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, unsigned long apqi, - struct ap_queue_table *qtable) + struct list_head *qlist) { unsigned long apid; struct vfio_ap_queue *q; @@ -1226,11 +1279,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) { q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi); - if (q && qtable) { + if (q && qlist) { if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) - hash_add(qtable->queues, &q->mdev_qnode, - q->apqn); + list_add_tail(&q->reset_qnode, qlist); } } } @@ -1238,26 +1290,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev, static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev, unsigned long apqi) { - int loop_cursor; - struct vfio_ap_queue *q; - struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL); + struct vfio_ap_queue *q, *tmpq; + struct list_head qlist; - hash_init(qtable->queues); - vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable); + INIT_LIST_HEAD(&qlist); + vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist); if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); } - vfio_ap_mdev_reset_queues(qtable); + vfio_ap_mdev_reset_qlist(&qlist); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) { vfio_ap_unlink_mdev_fr_queue(q); - hash_del(&q->mdev_qnode); + list_del(&q->reset_qnode); } - - kfree(qtable); } /** @@ -1612,7 +1661,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) get_update_locks_for_kvm(kvm); kvm_arch_crypto_clear_masks(kvm); - vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + vfio_ap_mdev_reset_queues(matrix_mdev); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; @@ -1748,15 +1797,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q) } } -static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable) +static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev) { int ret = 0, loop_cursor; struct vfio_ap_queue *q; - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) + hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) vfio_ap_mdev_reset_queue(q); - hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) { + flush_work(&q->reset_work); + + if (q->reset_status.response_code) + ret = -EIO; + } + + return ret; +} + +static int vfio_ap_mdev_reset_qlist(struct list_head *qlist) +{ + int ret = 0; + struct vfio_ap_queue *q; + + list_for_each_entry(q, qlist, reset_qnode) + vfio_ap_mdev_reset_queue(q); + + list_for_each_entry(q, qlist, reset_qnode) { flush_work(&q->reset_work); if (q->reset_status.response_code) @@ -1942,7 +2009,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, ret = vfio_ap_mdev_get_device_info(arg); break; case VFIO_DEVICE_RESET: - ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable); + ret = vfio_ap_mdev_reset_queues(matrix_mdev); break; case VFIO_DEVICE_GET_IRQ_INFO: ret = vfio_ap_get_irq_info(arg); @@ -2088,6 +2155,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) { int ret; struct vfio_ap_queue *q; + DECLARE_BITMAP(apm_filtered, AP_DEVICES); struct ap_matrix_mdev *matrix_mdev; ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group); @@ -2109,15 +2177,28 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (matrix_mdev) { vfio_ap_mdev_link_queue(matrix_mdev, q); - if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, - matrix_mdev->matrix.aqm, - matrix_mdev)) + /* + * If we're in the process of handling the adding of adapters or + * domains to the host's AP configuration, then let the + * vfio_ap device driver's on_scan_complete callback filter the + * matrix and update the guest's AP configuration after all of + * the new queue devices are probed. + */ + if (!bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) || + !bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS)) + goto done; + + if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) { vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + } } + +done: dev_set_drvdata(&apdev->device, q); release_update_locks_for_mdev(matrix_mdev); - return 0; + return ret; err_remove_group: sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group); @@ -2134,26 +2215,40 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev) q = dev_get_drvdata(&apdev->device); get_update_locks_for_queue(q); matrix_mdev = q->matrix_mdev; + apid = AP_QID_CARD(q->apqn); + apqi = AP_QID_QUEUE(q->apqn); if (matrix_mdev) { - vfio_ap_unlink_queue_fr_mdev(q); - - apid = AP_QID_CARD(q->apqn); - apqi = AP_QID_QUEUE(q->apqn); - - /* - * If the queue is assigned to the guest's APCB, then remove - * the adapter's APID from the APCB and hot it into the guest. - */ + /* If the queue is assigned to the guest's AP configuration */ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) && test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { + /* + * Since the queues are defined via a matrix of adapters + * and domains, it is not possible to hot unplug a + * single queue; so, let's unplug the adapter. + */ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apid(matrix_mdev, apid); + goto done; } } - vfio_ap_mdev_reset_queue(q); - flush_work(&q->reset_work); + /* + * If the queue is not in the host's AP configuration, then resetting + * it will fail with response code 01, (APQN not valid); so, let's make + * sure it is in the host's config. + */ + if (test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm) && + test_bit_inv(apqi, (unsigned long *)matrix_dev->info.aqm)) { + vfio_ap_mdev_reset_queue(q); + flush_work(&q->reset_work); + } + +done: + if (matrix_mdev) + vfio_ap_unlink_queue_fr_mdev(q); + dev_set_drvdata(&apdev->device, NULL); kfree(q); release_update_locks_for_mdev(matrix_mdev); @@ -2461,39 +2556,30 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info, static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev) { - bool do_hotplug = false; - int filter_domains = 0; - int filter_adapters = 0; - DECLARE_BITMAP(apm, AP_DEVICES); - DECLARE_BITMAP(aqm, AP_DOMAINS); + DECLARE_BITMAP(apm_filtered, AP_DEVICES); + bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false; mutex_lock(&matrix_mdev->kvm->lock); mutex_lock(&matrix_dev->mdevs_lock); - filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm, - matrix_mdev->apm_add, AP_DEVICES); - filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm, - matrix_mdev->aqm_add, AP_DOMAINS); + filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm, + matrix_mdev->apm_add, AP_DEVICES); + filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm, + matrix_mdev->aqm_add, AP_DOMAINS); + filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm, + matrix_mdev->adm_add, AP_DOMAINS); - if (filter_adapters && filter_domains) - do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev); - else if (filter_adapters) - do_hotplug |= - vfio_ap_mdev_filter_matrix(apm, - matrix_mdev->shadow_apcb.aqm, - matrix_mdev); - else - do_hotplug |= - vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm, - aqm, matrix_mdev); + if (filter_adapters || filter_domains) + do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered); - if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add, - AP_DOMAINS)) + if (filter_cdoms) do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev); if (do_hotplug) vfio_ap_mdev_update_guest_apcb(matrix_mdev); + reset_queues_for_apids(matrix_mdev, apm_filtered); + mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&matrix_mdev->kvm->lock); } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 88aff8b81f2f..98d37aa27044 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -133,6 +133,8 @@ struct ap_matrix_mdev { * @apqn: the APQN of the AP queue device * @saved_isc: the guest ISC registered with the GIB interface * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable + * @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues + * that need to be reset * @reset_status: the status from the last reset of the queue * @reset_work: work to wait for queue reset to complete */ @@ -143,6 +145,7 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; struct hlist_node mdev_qnode; + struct list_head reset_qnode; struct ap_queue_status reset_status; struct work_struct reset_work; };