Merge branch 'vfio-ap' into features

Tony Krowiak says:

===================
This patch series is for the changes required in the vfio_ap device
driver to facilitate pass-through of crypto devices to a secure
execution guest. In particular, it is critical that no data from the
queues passed through to the SE guest is leaked when the guest is
destroyed. There are also some new response codes returned from the
PQAP(ZAPQ) and PQAP(TAPQ) commands that have been added to the
architecture in support of pass-through of crypto devices to SE guests;
these need to be accounted for when handling the reset of queues.
===================

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
This commit is contained in:
Heiko Carstens 2023-08-23 14:36:37 +02:00
commit 6daf5a6824
7 changed files with 139 additions and 77 deletions

View file

@ -1028,6 +1028,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
extern char sie_exit;
bool kvm_s390_pv_is_protected(struct kvm *kvm);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);

View file

@ -463,6 +463,7 @@ static inline int is_prot_virt_host(void)
return prot_virt_host;
}
int uv_pin_shared(unsigned long paddr);
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_owned_page(unsigned long paddr);
@ -475,6 +476,11 @@ void setup_uv(void);
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
static inline int uv_pin_shared(unsigned long paddr)
{
return 0;
}
static inline int uv_destroy_owned_page(unsigned long paddr)
{
return 0;

View file

@ -88,7 +88,7 @@ void __init setup_uv(void)
* Requests the Ultravisor to pin the page in the shared state. This will
* cause an intercept when the guest attempts to unshare the pinned page.
*/
static int uv_pin_shared(unsigned long paddr)
int uv_pin_shared(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(uv_pin_shared);
/*
* Requests the Ultravisor to destroy a guest page and make it

View file

@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
return vcpu->arch.pv.handle;
}
static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
lockdep_assert_held(&kvm->lock);
return !!kvm_s390_pv_get_handle(kvm);
}
static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
{
lockdep_assert_held(&vcpu->mutex);
return !!kvm_s390_pv_cpu_get_handle(vcpu);
}
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);

View file

@ -18,6 +18,20 @@
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
lockdep_assert_held(&kvm->lock);
return !!kvm_s390_pv_get_handle(kvm);
}
EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
{
lockdep_assert_held(&vcpu->mutex);
return !!kvm_s390_pv_cpu_get_handle(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
/**
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
* be destroyed

View file

@ -30,13 +30,12 @@
#define AP_QUEUE_UNASSIGNED "unassigned"
#define AP_QUEUE_IN_USE "in use"
#define MAX_RESET_CHECK_WAIT 200 /* Sleep max 200ms for reset check */
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
/**
* get_update_locks_for_kvm: Acquire the locks required to dynamically update a
@ -360,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
return 0;
}
static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
{
int ret;
/*
* The nib has to be located in shared storage since guest and
* host access it. vfio_pin_pages() will do a pin shared and
* if that fails (possibly because it's not a shared page) it
* calls export. We try to do a second pin shared here so that
* the UV gives us an error code if we try to pin a non-shared
* page.
*
* If the page is already pinned shared the UV will return a success.
*/
ret = uv_pin_shared(addr);
if (ret) {
/* vfio_pin_pages() likely exported the page so let's re-import */
gmap_convert_to_secure(gmap, addr);
}
return ret;
}
/**
* vfio_ap_irq_enable - Enable Interruption for a APQN
*
@ -423,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
aqic_gisa.gisc = isc;
/* NIB in non-shared storage is a rc 6 for PV guests */
if (kvm_s390_pv_cpu_is_protected(vcpu) &&
ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
status.response_code = AP_RESPONSE_INVALID_ADDRESS;
return status;
}
nisc = kvm_s390_gisc_register(kvm, isc);
if (nisc < 0) {
VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
@ -675,7 +704,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
*/
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q || q->reset_rc) {
if (!q || q->reset_status.response_code) {
clear_bit_inv(apid,
matrix_mdev->shadow_apcb.apm);
break;
@ -1608,19 +1637,21 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
{
switch (status->response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
if (status->queue_empty && !status->irq_enabled)
return 0;
return -EBUSY;
case AP_RESPONSE_DECONFIGURED:
/*
* If the AP queue is deconfigured, any subsequent AP command
* targeting the queue will fail with the same response code. On the
* other hand, when an AP adapter is deconfigured, the associated
* queues are reset, so let's return a value indicating the reset
* for which we're waiting completed successfully.
*/
return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
return -EBUSY;
case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE:
case AP_RESPONSE_ASSOC_FAILED:
/*
* These asynchronous response codes indicate a PQAP(AAPQ)
* instruction to associate a secret with the guest failed. All
* subsequent AP instructions will end with the asynchronous
* response code until the AP queue is reset; so, let's return
* a value indicating a reset needs to be performed again.
*/
return -EAGAIN;
default:
WARN(true,
"failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",
@ -1630,91 +1661,105 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
}
}
static int apq_reset_check(struct vfio_ap_queue *q)
{
int ret;
int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL;
struct ap_queue_status status;
#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
for (; iters > 0; iters--) {
static void apq_reset_check(struct work_struct *reset_work)
{
int ret = -EBUSY, elapsed = 0;
struct ap_queue_status status;
struct vfio_ap_queue *q;
q = container_of(reset_work, struct vfio_ap_queue, reset_work);
memcpy(&status, &q->reset_status, sizeof(status));
while (true) {
msleep(AP_RESET_INTERVAL);
elapsed += AP_RESET_INTERVAL;
status = ap_tapq(q->apqn, NULL);
ret = apq_status_check(q->apqn, &status);
if (ret != -EBUSY)
return ret;
if (ret == -EIO)
return;
if (ret == -EBUSY) {
pr_notice_ratelimited(WAIT_MSG, elapsed,
AP_QID_CARD(q->apqn),
AP_QID_QUEUE(q->apqn),
status.response_code,
status.queue_empty,
status.irq_enabled);
} else {
if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
q->reset_status.response_code == AP_RESPONSE_BUSY ||
q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS ||
ret == -EAGAIN) {
status = ap_zapq(q->apqn, 0);
memcpy(&q->reset_status, &status, sizeof(status));
continue;
}
/*
* When an AP adapter is deconfigured, the
* associated queues are reset, so let's set the
* status response code to 0 so the queue may be
* passed through (i.e., not filtered)
*/
if (status.response_code == AP_RESPONSE_DECONFIGURED)
q->reset_status.response_code = 0;
if (q->saved_isc != VFIO_AP_ISC_INVALID)
vfio_ap_free_aqic_resources(q);
break;
}
}
WARN_ONCE(iters <= 0,
"timeout verifying reset of queue %02x.%04x (%u, %u, %u)",
AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
status.queue_empty, status.irq_enabled, status.response_code);
return ret;
}
static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
{
struct ap_queue_status status;
int ret;
if (!q)
return 0;
retry_zapq:
return;
status = ap_zapq(q->apqn, 0);
q->reset_rc = status.response_code;
memcpy(&q->reset_status, &status, sizeof(status));
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
ret = 0;
/* if the reset has not completed, wait for it to take effect */
if (!status.queue_empty || status.irq_enabled)
ret = apq_reset_check(q);
break;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS:
/*
* There is a reset issued by another process in progress. Let's wait
* for that to complete. Since we have no idea whether it was a RAPQ or
* ZAPQ, then if it completes successfully, let's issue the ZAPQ.
* Let's verify whether the ZAPQ completed successfully on a work queue.
*/
ret = apq_reset_check(q);
if (ret)
break;
goto retry_zapq;
queue_work(system_long_wq, &q->reset_work);
break;
case AP_RESPONSE_DECONFIGURED:
/*
* When an AP adapter is deconfigured, the associated
* queues are reset, so let's return a value indicating the reset
* completed successfully.
* queues are reset, so let's set the status response code to 0
* so the queue may be passed through (i.e., not filtered).
*/
ret = 0;
q->reset_status.response_code = 0;
vfio_ap_free_aqic_resources(q);
break;
default:
WARN(true,
"PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
status.response_code);
return -EIO;
}
vfio_ap_free_aqic_resources(q);
return ret;
}
static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
{
int ret, loop_cursor, rc = 0;
int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
vfio_ap_mdev_reset_queue(q);
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
ret = vfio_ap_mdev_reset_queue(q);
/*
* Regardless whether a queue turns out to be busy, or
* is not operational, we need to continue resetting
* the remaining queues.
*/
if (ret)
rc = ret;
flush_work(&q->reset_work);
if (q->reset_status.response_code)
ret = -EIO;
}
return rc;
return ret;
}
static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
@ -2038,6 +2083,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
q->apqn = to_ap_queue(&apdev->device)->qid;
q->saved_isc = VFIO_AP_ISC_INVALID;
memset(&q->reset_status, 0, sizeof(q->reset_status));
INIT_WORK(&q->reset_work, apq_reset_check);
matrix_mdev = get_update_locks_by_apqn(q->apqn);
if (matrix_mdev) {
@ -2087,6 +2134,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
}
vfio_ap_mdev_reset_queue(q);
flush_work(&q->reset_work);
dev_set_drvdata(&apdev->device, NULL);
kfree(q);
release_update_locks_for_mdev(matrix_mdev);

View file

@ -133,7 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
* @reset_rc: the status response code from the last reset of the queue
* @reset_status: the status from the last reset of the queue
* @reset_work: work to wait for queue reset to complete
*/
struct vfio_ap_queue {
struct ap_matrix_mdev *matrix_mdev;
@ -142,7 +143,8 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
unsigned int reset_rc;
struct ap_queue_status reset_status;
struct work_struct reset_work;
};
int vfio_ap_mdev_register(void);