s390 updates for 6.8 merge window part 2

- do not enable by default the support of 31-bit Enterprise Systems
   Architecture (ESA) ELF binaries
 
 - drop automatic CONFIG_KEXEC selection, while set CONFIG_KEXEC=y
   explicitly for defconfig and debug_defconfig only
 
 - fix zpci_get_max_io_size() to allow PCI block stores where
   normal PCI stores were used otherwise
 
 - remove unneeded tsk variable in do_exception() fault handler
 
 - __load_fpu_regs() is only called from the core kernel code.
   Therefore, remove not needed EXPORT_SYMBOL.
 
 - remove leftover comment from s390_fpregs_set() callback
 
 - few cleanups to Processor Activity Instrumentation (PAI) code
   (which perf framework is based on)
 
 - replace Wenjia Zhang with Thorsten Winkler as s390 Inter-User
   Communication Vehicle (IUCV) networking maintainer
 
 - Fix all scenarios where queues previously removed from a guest's
   Adjunct-Processor (AP) configuration do not re-appear in a reset
   state when they are subsequently made available to a guest again
 -----BEGIN PGP SIGNATURE-----
 
 iI0EABYIADUWIQQrtrZiYVkVzKQcYivNdxKlNrRb8AUCZalUTxccYWdvcmRlZXZA
 bGludXguaWJtLmNvbQAKCRDNdxKlNrRb8OZlAQDq7gZEL/ZSh1X4IQa4qPbdGeko
 ArZvcOi3SiVW3jw3FgD8CfPauKux1tliSpeRybcRkUVQoPGvoWc8kwAkCm/Juwc=
 =+Xd9
 -----END PGP SIGNATURE-----

Merge tag 's390-6.8-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull more s390 updates from Alexander Gordeev:

 - do not enable by default the support of 31-bit Enterprise Systems
   Architecture (ESA) ELF binaries

 - drop automatic CONFIG_KEXEC selection, while set CONFIG_KEXEC=y
   explicitly for defconfig and debug_defconfig only

 - fix zpci_get_max_io_size() to allow PCI block stores where normal PCI
   stores were used otherwise

 - remove unneeded tsk variable in do_exception() fault handler

 - __load_fpu_regs() is only called from the core kernel code.
   Therefore, remove not needed EXPORT_SYMBOL.

 - remove leftover comment from s390_fpregs_set() callback

 - few cleanups to Processor Activity Instrumentation (PAI) code (which
   perf framework is based on)

 - replace Wenjia Zhang with Thorsten Winkler as s390 Inter-User
   Communication Vehicle (IUCV) networking maintainer

 - Fix all scenarios where queues previously removed from a guest's
   Adjunct-Processor (AP) configuration do not re-appear in a reset
   state when they are subsequently made available to a guest again

* tag 's390-6.8-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/vfio-ap: do not reset queue removed from host config
  s390/vfio-ap: reset queues associated with adapter for queue unbound from driver
  s390/vfio-ap: reset queues filtered from the guest's AP config
  s390/vfio-ap: let on_scan_complete() callback filter matrix and update guest's APCB
  s390/vfio-ap: loop over the shadow APCB when filtering guest's AP configuration
  s390/vfio-ap: always filter entire AP matrix
  s390/net: add Thorsten Winkler as maintainer
  s390/pai_ext: split function paiext_push_sample
  s390/pai_ext: rework function paiext_copy argments
  s390/pai: rework paiXXX_start and paiXXX_stop functions
  s390/pai_crypto: split function paicrypt_push_sample
  s390/pai: rework paixxxx_getctr interface
  s390/ptrace: remove leftover comment
  s390/fpu: remove __load_fpu_regs() export
  s390/mm,fault: remove not needed tsk variable
  s390/pci: fix max size calculation in zpci_memcpy_toio()
  s390/kexec: do not automatically select KEXEC option
  s390/compat: change default for CONFIG_COMPAT to "n"
This commit is contained in:
Linus Torvalds 2024-01-18 14:11:25 -08:00
commit 302d185865
14 changed files with 298 additions and 194 deletions

View file

@ -19124,7 +19124,7 @@ F: drivers/iommu/s390-iommu.c
S390 IUCV NETWORK LAYER
M: Alexandra Winter <wintera@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
M: Thorsten Winkler <twinkler@linux.ibm.com>
L: linux-s390@vger.kernel.org
L: netdev@vger.kernel.org
S: Supported
@ -19143,7 +19143,7 @@ F: arch/s390/mm
S390 NETWORK DRIVERS
M: Alexandra Winter <wintera@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
M: Thorsten Winkler <twinkler@linux.ibm.com>
L: linux-s390@vger.kernel.org
L: netdev@vger.kernel.org
S: Supported

View file

@ -216,7 +216,6 @@ config S390
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
select KEXEC
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
@ -443,7 +442,7 @@ config COMMAND_LINE_SIZE
line.
config COMPAT
def_bool y
def_bool n
prompt "Kernel support for 31 bit emulation"
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
@ -454,7 +453,9 @@ config COMPAT
Select this option if you want to enable your system kernel to
handle system-calls from ELF binaries for 31 bit ESA. This option
(and some other stuff like libraries and such) is needed for
executing 31 bit applications. It is safe to say "Y".
executing 31 bit applications.
If unsure say N.
config SMP
def_bool y

View file

@ -40,6 +40,7 @@ CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y

View file

@ -38,6 +38,7 @@ CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y

View file

@ -10,7 +10,6 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_CRASH_DUMP=y
CONFIG_MARCH_Z13=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
# CONFIG_CHSC_SCH is not set

View file

@ -11,6 +11,8 @@
/* I/O size constraints */
#define ZPCI_MAX_READ_SIZE 8
#define ZPCI_MAX_WRITE_SIZE 128
#define ZPCI_BOUNDARY_SIZE (1 << 12)
#define ZPCI_BOUNDARY_MASK (ZPCI_BOUNDARY_SIZE - 1)
/* I/O Map */
#define ZPCI_IOMAP_SHIFT 48
@ -125,16 +127,18 @@ static inline int zpci_read_single(void *dst, const volatile void __iomem *src,
int zpci_write_block(volatile void __iomem *dst, const void *src,
unsigned long len);
static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max)
{
int count = len > max ? max : len, size = 1;
int offset = dst & ZPCI_BOUNDARY_MASK;
int size;
while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) {
dst = dst >> 1;
src = src >> 1;
size = size << 1;
}
return size;
size = min3(len, ZPCI_BOUNDARY_SIZE - offset, max);
if (IS_ALIGNED(src, 8) && IS_ALIGNED(dst, 8) && IS_ALIGNED(size, 8))
return size;
if (size >= 8)
return 8;
return rounddown_pow_of_two(size);
}
static inline int zpci_memcpy_fromio(void *dst,
@ -144,9 +148,9 @@ static inline int zpci_memcpy_fromio(void *dst,
int size, rc = 0;
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) src,
(u64) dst, n,
ZPCI_MAX_READ_SIZE);
size = zpci_get_max_io_size((u64 __force) src,
(u64) dst, n,
ZPCI_MAX_READ_SIZE);
rc = zpci_read_single(dst, src, size);
if (rc)
break;
@ -166,9 +170,9 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
return -EINVAL;
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) dst,
(u64) src, n,
ZPCI_MAX_WRITE_SIZE);
size = zpci_get_max_io_size((u64 __force) dst,
(u64) src, n,
ZPCI_MAX_WRITE_SIZE);
if (size > 8) /* main path */
rc = zpci_write_block(dst, src, size);
else

View file

@ -208,7 +208,6 @@ void __load_fpu_regs(void)
}
clear_cpu_flag(CIF_FPU);
}
EXPORT_SYMBOL(__load_fpu_regs);
void load_fpu_regs(void)
{

View file

@ -111,11 +111,11 @@ static void paicrypt_event_destroy(struct perf_event *event)
mutex_unlock(&pai_reserve_mutex);
}
static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
{
if (kernel)
nr += PAI_CRYPTO_MAXCTR;
return cpump->page[nr];
return page[nr];
}
/* Read the counter values. Return value from location in CMP. For event
@ -129,13 +129,13 @@ static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
int i;
if (event->attr.config != PAI_CRYPTO_BASE) {
return paicrypt_getctr(cpump,
return paicrypt_getctr(cpump->page,
event->attr.config - PAI_CRYPTO_BASE,
kernel);
}
for (i = 1; i <= paicrypt_cnt; i++) {
u64 val = paicrypt_getctr(cpump, i, kernel);
u64 val = paicrypt_getctr(cpump->page, i, kernel);
if (!val)
continue;
@ -317,10 +317,14 @@ static void paicrypt_start(struct perf_event *event, int flags)
* Events are added, deleted and re-added when 2 or more events
* are active at the same time.
*/
if (!event->hw.last_tag) {
event->hw.last_tag = 1;
sum = paicrypt_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
if (!event->attr.sample_period) { /* Counting */
if (!event->hw.last_tag) {
event->hw.last_tag = 1;
sum = paicrypt_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
}
} else { /* Sampling */
perf_sched_cb_inc(event->pmu);
}
}
@ -336,19 +340,18 @@ static int paicrypt_add(struct perf_event *event, int flags)
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
cpump->event = event;
if (flags & PERF_EF_START && !event->attr.sample_period) {
/* Only counting needs initial counter value */
if (flags & PERF_EF_START)
paicrypt_start(event, PERF_EF_RELOAD);
}
event->hw.state = 0;
if (event->attr.sample_period)
perf_sched_cb_inc(event->pmu);
return 0;
}
static void paicrypt_stop(struct perf_event *event, int flags)
{
paicrypt_read(event);
if (!event->attr.sample_period) /* Counting */
paicrypt_read(event);
else /* Sampling */
perf_sched_cb_dec(event->pmu);
event->hw.state = PERF_HES_STOPPED;
}
@ -357,11 +360,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
struct paicrypt_map *cpump = mp->mapptr;
if (event->attr.sample_period)
perf_sched_cb_dec(event->pmu);
if (!event->attr.sample_period)
/* Only counting needs to read counter */
paicrypt_stop(event, PERF_EF_UPDATE);
paicrypt_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
WRITE_ONCE(S390_lowcore.ccd, 0);
@ -373,8 +372,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
static size_t paicrypt_copy(struct pai_userdata *userdata,
struct paicrypt_map *cpump,
static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
bool exclude_user, bool exclude_kernel)
{
int i, outidx = 0;
@ -383,9 +381,9 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
u64 val = 0;
if (!exclude_kernel)
val += paicrypt_getctr(cpump, i, true);
val += paicrypt_getctr(page, i, true);
if (!exclude_user)
val += paicrypt_getctr(cpump, i, false);
val += paicrypt_getctr(page, i, false);
if (val) {
userdata[outidx].num = i;
userdata[outidx].value = val;
@ -395,25 +393,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
return outidx * sizeof(struct pai_userdata);
}
static int paicrypt_push_sample(void)
static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
struct perf_event *event)
{
struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
struct paicrypt_map *cpump = mp->mapptr;
struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
size_t rawsize;
int overflow;
if (!cpump->event) /* No event active */
return 0;
rawsize = paicrypt_copy(cpump->save, cpump,
cpump->event->attr.exclude_user,
cpump->event->attr.exclude_kernel);
if (!rawsize) /* No incremented counters */
return 0;
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@ -444,6 +431,25 @@ static int paicrypt_push_sample(void)
return overflow;
}
/* Check if there is data to be saved on schedule out of a task. */
static int paicrypt_have_sample(void)
{
struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
struct paicrypt_map *cpump = mp->mapptr;
struct perf_event *event = cpump->event;
size_t rawsize;
int rc = 0;
if (!event) /* No event active */
return 0;
rawsize = paicrypt_copy(cpump->save, cpump->page,
cpump->event->attr.exclude_user,
cpump->event->attr.exclude_kernel);
if (rawsize) /* No incremented counters */
rc = paicrypt_push_sample(rawsize, cpump, event);
return rc;
}
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
@ -453,7 +459,7 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sch
* results on schedule_out and if page was dirty, clear values.
*/
if (!sched_in)
paicrypt_push_sample();
paicrypt_have_sample();
}
/* Attribute definitions for paicrypt interface. As with other CPU

View file

@ -276,9 +276,9 @@ static int paiext_event_init(struct perf_event *event)
return 0;
}
static u64 paiext_getctr(struct paiext_map *cpump, int nr)
static u64 paiext_getctr(unsigned long *area, int nr)
{
return cpump->area[nr];
return area[nr];
}
/* Read the counter values. Return value from location in buffer. For event
@ -292,10 +292,11 @@ static u64 paiext_getdata(struct perf_event *event)
int i;
if (event->attr.config != PAI_NNPA_BASE)
return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
return paiext_getctr(cpump->area,
event->attr.config - PAI_NNPA_BASE);
for (i = 1; i <= paiext_cnt; i++)
sum += paiext_getctr(cpump, i);
sum += paiext_getctr(cpump->area, i);
return sum;
}
@ -320,11 +321,15 @@ static void paiext_start(struct perf_event *event, int flags)
{
u64 sum;
if (event->hw.last_tag)
return;
event->hw.last_tag = 1;
sum = paiext_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
if (!event->attr.sample_period) { /* Counting */
if (!event->hw.last_tag) {
event->hw.last_tag = 1;
sum = paiext_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
}
} else { /* Sampling */
perf_sched_cb_inc(event->pmu);
}
}
static int paiext_add(struct perf_event *event, int flags)
@ -341,21 +346,19 @@ static int paiext_add(struct perf_event *event, int flags)
debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
__func__, S390_lowcore.aicd, pcb->acc);
}
if (flags & PERF_EF_START && !event->attr.sample_period) {
/* Only counting needs initial counter value */
cpump->event = event;
if (flags & PERF_EF_START)
paiext_start(event, PERF_EF_RELOAD);
}
event->hw.state = 0;
if (event->attr.sample_period) {
cpump->event = event;
perf_sched_cb_inc(event->pmu);
}
return 0;
}
static void paiext_stop(struct perf_event *event, int flags)
{
paiext_read(event);
if (!event->attr.sample_period) /* Counting */
paiext_read(event);
else /* Sampling */
perf_sched_cb_dec(event->pmu);
event->hw.state = PERF_HES_STOPPED;
}
@ -365,12 +368,7 @@ static void paiext_del(struct perf_event *event, int flags)
struct paiext_map *cpump = mp->mapptr;
struct paiext_cb *pcb = cpump->paiext_cb;
if (event->attr.sample_period)
perf_sched_cb_dec(event->pmu);
if (!event->attr.sample_period) {
/* Only counting needs to read counter */
paiext_stop(event, PERF_EF_UPDATE);
}
paiext_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
/* Disable CPU instruction lookup for PAIE1 control block */
local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
@ -386,13 +384,12 @@ static void paiext_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
static size_t paiext_copy(struct paiext_map *cpump)
static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area)
{
struct pai_userdata *userdata = cpump->save;
int i, outidx = 0;
for (i = 1; i <= paiext_cnt; i++) {
u64 val = paiext_getctr(cpump, i);
u64 val = paiext_getctr(area, i);
if (val) {
userdata[outidx].num = i;
@ -418,21 +415,14 @@ static size_t paiext_copy(struct paiext_map *cpump)
* sched_task() callback. That callback is not active after paiext_del()
* returns and has deleted the event on that CPU.
*/
static int paiext_push_sample(void)
static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
struct perf_event *event)
{
struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
struct paiext_map *cpump = mp->mapptr;
struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
size_t rawsize;
int overflow;
rawsize = paiext_copy(cpump);
if (!rawsize) /* No incremented counters */
return 0;
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@ -461,6 +451,23 @@ static int paiext_push_sample(void)
return overflow;
}
/* Check if there is data to be saved on schedule out of a task. */
static int paiext_have_sample(void)
{
struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
struct paiext_map *cpump = mp->mapptr;
struct perf_event *event = cpump->event;
size_t rawsize;
int rc = 0;
if (!event)
return 0;
rawsize = paiext_copy(cpump->save, cpump->area);
if (rawsize) /* Incremented counters */
rc = paiext_push_sample(rawsize, cpump, event);
return rc;
}
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event NNPA_ALL is allowed.
*/
@ -470,7 +477,7 @@ static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched
* results on schedule_out and if page was dirty, clear values.
*/
if (!sched_in)
paiext_push_sample();
paiext_have_sample();
}
/* Attribute definitions for pai extension1 interface. As with other CPU

View file

@ -917,7 +917,6 @@ static int s390_fpregs_set(struct task_struct *target,
else
memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
/* If setting FPC, must validate it first. */
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,

View file

@ -280,7 +280,6 @@ static void do_sigbus(struct pt_regs *regs)
static void do_exception(struct pt_regs *regs, int access)
{
struct vm_area_struct *vma;
struct task_struct *tsk;
unsigned long address;
struct mm_struct *mm;
enum fault_type type;
@ -289,7 +288,6 @@ static void do_exception(struct pt_regs *regs, int access)
vm_fault_t fault;
bool is_write;
tsk = current;
/*
* The instruction that caused the program check has
* been nullified. Don't signal single step via SIGTRAP.
@ -297,7 +295,7 @@ static void do_exception(struct pt_regs *regs, int access)
clear_thread_flag(TIF_PER_TRAP);
if (kprobe_page_fault(regs, 14))
return;
mm = tsk->mm;
mm = current->mm;
address = get_fault_address(regs);
is_write = fault_is_write(regs);
type = get_fault_type(regs);

View file

@ -97,9 +97,9 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
return -EINVAL;
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) dst,
(u64 __force) src, n,
ZPCI_MAX_WRITE_SIZE);
size = zpci_get_max_io_size((u64 __force) dst,
(u64 __force) src, n,
ZPCI_MAX_WRITE_SIZE);
if (size > 8) /* main path */
rc = __pcistb_mio_inuser(dst, src, size, &status);
else
@ -242,9 +242,9 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
u8 status;
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) src,
(u64 __force) dst, n,
ZPCI_MAX_READ_SIZE);
size = zpci_get_max_io_size((u64 __force) src,
(u64 __force) dst, n,
ZPCI_MAX_READ_SIZE);
rc = __pcilg_mio_inuser(dst, src, size, &status);
if (rc)
break;

View file

@ -32,7 +32,8 @@
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
static int vfio_ap_mdev_reset_qlist(struct list_head *qlist);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
@ -665,17 +666,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
* device driver.
*
* @matrix_mdev: the matrix mdev whose matrix is to be filtered.
* @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the
* guest's AP configuration that are still in the host's AP
* configuration.
*
* Note: If an APQN referencing a queue device that is not bound to the vfio_ap
* driver, its APID will be filtered from the guest's APCB. The matrix
* structure precludes filtering an individual APQN, so its APID will be
* filtered.
* filtered. Consequently, all queues associated with the adapter that
* are in the host's AP configuration must be reset. If queues are
* subsequently made available again to the guest, they should re-appear
* in a reset state
*
* Return: a boolean value indicating whether the KVM guest's APCB was changed
* by the filtering or not.
*/
static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
struct ap_matrix_mdev *matrix_mdev)
static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
unsigned long *apm_filtered)
{
unsigned long apid, apqi, apqn;
DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
@ -685,6 +692,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
bitmap_clear(apm_filtered, 0, AP_DEVICES);
/*
* Copy the adapters, domains and control domains to the shadow_apcb
@ -696,8 +704,9 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm,
(unsigned long *)matrix_dev->info.aqm, AP_DOMAINS);
for_each_set_bit_inv(apid, apm, AP_DEVICES) {
for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) {
for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
AP_DOMAINS) {
/*
* If the APQN is not bound to the vfio_ap device
* driver, then we can't assign it to the guest's
@ -709,8 +718,16 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q || q->reset_status.response_code) {
clear_bit_inv(apid,
matrix_mdev->shadow_apcb.apm);
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
/*
* If the adapter was previously plugged into
* the guest, let's let the caller know that
* the APID was filtered.
*/
if (test_bit_inv(apid, prev_shadow_apm))
set_bit_inv(apid, apm_filtered);
break;
}
}
@ -812,7 +829,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
mutex_lock(&matrix_dev->guests_lock);
mutex_lock(&matrix_dev->mdevs_lock);
vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
vfio_ap_mdev_reset_queues(matrix_mdev);
vfio_ap_mdev_unlink_fr_queues(matrix_mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->mdevs_lock);
@ -922,6 +939,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
AP_MKQID(apid, apqi));
}
static void collect_queues_to_reset(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid,
struct list_head *qlist)
{
struct vfio_ap_queue *q;
unsigned long apqi;
for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) {
q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi));
if (q)
list_add_tail(&q->reset_qnode, qlist);
}
}
static void reset_queues_for_apid(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid)
{
struct list_head qlist;
INIT_LIST_HEAD(&qlist);
collect_queues_to_reset(matrix_mdev, apid, &qlist);
vfio_ap_mdev_reset_qlist(&qlist);
}
static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev,
unsigned long *apm_reset)
{
struct list_head qlist;
unsigned long apid;
if (bitmap_empty(apm_reset, AP_DEVICES))
return 0;
INIT_LIST_HEAD(&qlist);
for_each_set_bit_inv(apid, apm_reset, AP_DEVICES)
collect_queues_to_reset(matrix_mdev, apid, &qlist);
return vfio_ap_mdev_reset_qlist(&qlist);
}
/**
* assign_adapter_store - parses the APID from @buf and sets the
* corresponding bit in the mediated matrix device's APM
@ -962,7 +1020,7 @@ static ssize_t assign_adapter_store(struct device *dev,
{
int ret;
unsigned long apid;
DECLARE_BITMAP(apm_delta, AP_DEVICES);
DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@ -991,12 +1049,11 @@ static ssize_t assign_adapter_store(struct device *dev,
}
vfio_ap_mdev_link_adapter(matrix_mdev, apid);
memset(apm_delta, 0, sizeof(apm_delta));
set_bit_inv(apid, apm_delta);
if (vfio_ap_mdev_filter_matrix(apm_delta,
matrix_mdev->matrix.aqm, matrix_mdev))
if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
reset_queues_for_apids(matrix_mdev, apm_filtered);
}
ret = count;
done:
@ -1027,11 +1084,12 @@ static struct vfio_ap_queue
* adapter was assigned.
* @matrix_mdev: the matrix mediated device to which the adapter was assigned.
* @apid: the APID of the unassigned adapter.
* @qtable: table for storing queues associated with unassigned adapter.
* @qlist: list for storing queues associated with unassigned adapter that
* need to be reset.
*/
static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid,
struct ap_queue_table *qtable)
struct list_head *qlist)
{
unsigned long apqi;
struct vfio_ap_queue *q;
@ -1039,11 +1097,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
if (q && qtable) {
if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
hash_add(qtable->queues, &q->mdev_qnode,
q->apqn);
list_add_tail(&q->reset_qnode, qlist);
}
}
}
@ -1051,26 +1108,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid)
{
int loop_cursor;
struct vfio_ap_queue *q;
struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
struct vfio_ap_queue *q, *tmpq;
struct list_head qlist;
hash_init(qtable->queues);
vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable);
INIT_LIST_HEAD(&qlist);
vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist);
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
vfio_ap_mdev_reset_queues(qtable);
vfio_ap_mdev_reset_qlist(&qlist);
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
hash_del(&q->mdev_qnode);
list_del(&q->reset_qnode);
}
kfree(qtable);
}
/**
@ -1171,7 +1225,7 @@ static ssize_t assign_domain_store(struct device *dev,
{
int ret;
unsigned long apqi;
DECLARE_BITMAP(aqm_delta, AP_DOMAINS);
DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@ -1200,12 +1254,11 @@ static ssize_t assign_domain_store(struct device *dev,
}
vfio_ap_mdev_link_domain(matrix_mdev, apqi);
memset(aqm_delta, 0, sizeof(aqm_delta));
set_bit_inv(apqi, aqm_delta);
if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta,
matrix_mdev))
if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
reset_queues_for_apids(matrix_mdev, apm_filtered);
}
ret = count;
done:
@ -1218,7 +1271,7 @@ static DEVICE_ATTR_WO(assign_domain);
static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi,
struct ap_queue_table *qtable)
struct list_head *qlist)
{
unsigned long apid;
struct vfio_ap_queue *q;
@ -1226,11 +1279,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
if (q && qtable) {
if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
hash_add(qtable->queues, &q->mdev_qnode,
q->apqn);
list_add_tail(&q->reset_qnode, qlist);
}
}
}
@ -1238,26 +1290,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi)
{
int loop_cursor;
struct vfio_ap_queue *q;
struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
struct vfio_ap_queue *q, *tmpq;
struct list_head qlist;
hash_init(qtable->queues);
vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable);
INIT_LIST_HEAD(&qlist);
vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist);
if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
vfio_ap_mdev_reset_queues(qtable);
vfio_ap_mdev_reset_qlist(&qlist);
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
hash_del(&q->mdev_qnode);
list_del(&q->reset_qnode);
}
kfree(qtable);
}
/**
@ -1612,7 +1661,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
get_update_locks_for_kvm(kvm);
kvm_arch_crypto_clear_masks(kvm);
vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
vfio_ap_mdev_reset_queues(matrix_mdev);
kvm_put_kvm(kvm);
matrix_mdev->kvm = NULL;
@ -1748,15 +1797,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
}
}
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
{
int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode)
vfio_ap_mdev_reset_queue(q);
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) {
flush_work(&q->reset_work);
if (q->reset_status.response_code)
ret = -EIO;
}
return ret;
}
static int vfio_ap_mdev_reset_qlist(struct list_head *qlist)
{
int ret = 0;
struct vfio_ap_queue *q;
list_for_each_entry(q, qlist, reset_qnode)
vfio_ap_mdev_reset_queue(q);
list_for_each_entry(q, qlist, reset_qnode) {
flush_work(&q->reset_work);
if (q->reset_status.response_code)
@ -1942,7 +2009,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
ret = vfio_ap_mdev_get_device_info(arg);
break;
case VFIO_DEVICE_RESET:
ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
ret = vfio_ap_mdev_reset_queues(matrix_mdev);
break;
case VFIO_DEVICE_GET_IRQ_INFO:
ret = vfio_ap_get_irq_info(arg);
@ -2088,6 +2155,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
{
int ret;
struct vfio_ap_queue *q;
DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev;
ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
@ -2109,15 +2177,28 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
if (matrix_mdev) {
vfio_ap_mdev_link_queue(matrix_mdev, q);
if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm,
matrix_mdev->matrix.aqm,
matrix_mdev))
/*
* If we're in the process of handling the adding of adapters or
* domains to the host's AP configuration, then let the
* vfio_ap device driver's on_scan_complete callback filter the
* matrix and update the guest's AP configuration after all of
* the new queue devices are probed.
*/
if (!bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) ||
!bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS))
goto done;
if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
reset_queues_for_apids(matrix_mdev, apm_filtered);
}
}
done:
dev_set_drvdata(&apdev->device, q);
release_update_locks_for_mdev(matrix_mdev);
return 0;
return ret;
err_remove_group:
sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
@ -2134,26 +2215,40 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
q = dev_get_drvdata(&apdev->device);
get_update_locks_for_queue(q);
matrix_mdev = q->matrix_mdev;
apid = AP_QID_CARD(q->apqn);
apqi = AP_QID_QUEUE(q->apqn);
if (matrix_mdev) {
vfio_ap_unlink_queue_fr_mdev(q);
apid = AP_QID_CARD(q->apqn);
apqi = AP_QID_QUEUE(q->apqn);
/*
* If the queue is assigned to the guest's APCB, then remove
* the adapter's APID from the APCB and hot it into the guest.
*/
/* If the queue is assigned to the guest's AP configuration */
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
/*
* Since the queues are defined via a matrix of adapters
* and domains, it is not possible to hot unplug a
* single queue; so, let's unplug the adapter.
*/
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
reset_queues_for_apid(matrix_mdev, apid);
goto done;
}
}
vfio_ap_mdev_reset_queue(q);
flush_work(&q->reset_work);
/*
* If the queue is not in the host's AP configuration, then resetting
* it will fail with response code 01, (APQN not valid); so, let's make
* sure it is in the host's config.
*/
if (test_bit_inv(apid, (unsigned long *)matrix_dev->info.apm) &&
test_bit_inv(apqi, (unsigned long *)matrix_dev->info.aqm)) {
vfio_ap_mdev_reset_queue(q);
flush_work(&q->reset_work);
}
done:
if (matrix_mdev)
vfio_ap_unlink_queue_fr_mdev(q);
dev_set_drvdata(&apdev->device, NULL);
kfree(q);
release_update_locks_for_mdev(matrix_mdev);
@ -2461,39 +2556,30 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info,
static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
{
bool do_hotplug = false;
int filter_domains = 0;
int filter_adapters = 0;
DECLARE_BITMAP(apm, AP_DEVICES);
DECLARE_BITMAP(aqm, AP_DOMAINS);
DECLARE_BITMAP(apm_filtered, AP_DEVICES);
bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false;
mutex_lock(&matrix_mdev->kvm->lock);
mutex_lock(&matrix_dev->mdevs_lock);
filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm,
matrix_mdev->apm_add, AP_DEVICES);
filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm,
matrix_mdev->aqm_add, AP_DOMAINS);
filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm,
matrix_mdev->apm_add, AP_DEVICES);
filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm,
matrix_mdev->aqm_add, AP_DOMAINS);
filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm,
matrix_mdev->adm_add, AP_DOMAINS);
if (filter_adapters && filter_domains)
do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev);
else if (filter_adapters)
do_hotplug |=
vfio_ap_mdev_filter_matrix(apm,
matrix_mdev->shadow_apcb.aqm,
matrix_mdev);
else
do_hotplug |=
vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm,
aqm, matrix_mdev);
if (filter_adapters || filter_domains)
do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered);
if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add,
AP_DOMAINS))
if (filter_cdoms)
do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev);
if (do_hotplug)
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
reset_queues_for_apids(matrix_mdev, apm_filtered);
mutex_unlock(&matrix_dev->mdevs_lock);
mutex_unlock(&matrix_mdev->kvm->lock);
}

View file

@ -133,6 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
* @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues
* that need to be reset
* @reset_status: the status from the last reset of the queue
* @reset_work: work to wait for queue reset to complete
*/
@ -143,6 +145,7 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
struct list_head reset_qnode;
struct ap_queue_status reset_status;
struct work_struct reset_work;
};