virtio, pc: fixes, features

New virtio iommu.
 Unrealize memory leaks.
 In-band kick/call support.
 Bugfixes, documentation all over the place.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl5XgekPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpPe0IAJzRlUZMmT0pJ0ppCfydAlnChGyoOmm5BnuV
 1u0qSxDYv3qDmIHa+LVcAwJCc4OmmWzFgWiO2V2+vnjwu/RwsiwzZOzXwecRnlsn
 0OjROmROAyR5j8h6pSzinWyRLcaKSS8tasDMRbRh7wlkEns78970V5GBPnvVQsGt
 WG2BO8cvkoCksry16YnzPQEuQ055q1x19rsw2yeZ+3yVfLtiSoplxo5/7UAIGcaE
 K4zUTQ3ktAbYfKxE96t7rxlmjbFM8H/W0GvKaPqjBDHEoi0SN+uIpyh5rHSeSsp8
 WS4KUMFvr/z5eEsD02bxsA87nC2PDeTWEgOO/QyBUMtgUt6i274=
 =ue55
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio, pc: fixes, features

New virtio iommu.
Unrealize memory leaks.
In-band kick/call support.
Bugfixes, documentation all over the place.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Thu 27 Feb 2020 08:46:33 GMT
# gpg:                using RSA key 5D09FD0871C8F85B94CA8A0D281F0DB8D28D5469
# gpg:                issuer "mst@redhat.com"
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (30 commits)
  Fixed assert in vhost_user_set_mem_table_postcopy
  vhost-user: only set slave channel for first vq
  acpi: cpuhp: document CPHP_GET_CPU_ID_CMD command
  libvhost-user: implement in-band notifications
  docs: vhost-user: add in-band kick/call messages
  libvhost-user: handle NOFD flag in call/kick/err better
  libvhost-user-glib: use g_main_context_get_thread_default()
  libvhost-user-glib: fix VugDev main fd cleanup
  libvhost-user: implement VHOST_USER_PROTOCOL_F_REPLY_ACK
  MAINTAINERS: add virtio-iommu related files
  hw/arm/virt: Add the virtio-iommu device tree mappings
  virtio-iommu-pci: Add virtio iommu pci support
  virtio-iommu: Support migration
  virtio-iommu: Implement fault reporting
  virtio-iommu: Implement translate
  virtio-iommu: Implement map/unmap
  virtio-iommu: Implement attach/detach command
  virtio-iommu: Decode the command payload
  virtio-iommu: Add skeleton
  virtio: gracefully handle invalid region caches
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-02-27 19:15:14 +00:00
commit 8b6b68e05b
28 changed files with 1562 additions and 83 deletions

View file

@ -1639,6 +1639,12 @@ F: hw/input/virtio-input*.c
F: include/hw/virtio/virtio-input.h
F: contrib/vhost-user-input/*
virtio-iommu
M: Eric Auger <eric.auger@redhat.com>
S: Maintained
F: hw/virtio/virtio-iommu*.c
F: include/hw/virtio/virtio-iommu.h
virtio-serial
M: Laurent Vivier <lvivier@redhat.com>
R: Amit Shah <amit@kernel.org>

View file

@ -89,9 +89,8 @@ vug_source_new(VugDev *gdev, int fd, GIOCondition cond,
src->gfd.events = cond;
g_source_add_poll(gsrc, &src->gfd);
id = g_source_attach(gsrc, NULL);
id = g_source_attach(gsrc, g_main_context_get_thread_default());
g_assert(id);
g_source_unref(gsrc);
return gsrc;
}
@ -131,6 +130,16 @@ static void vug_watch(VuDev *dev, int condition, void *data)
}
}
void vug_source_destroy(GSource *src)
{
if (!src) {
return;
}
g_source_destroy(src);
g_source_unref(src);
}
bool
vug_init(VugDev *dev, uint16_t max_queues, int socket,
vu_panic_cb panic, const VuDevIface *iface)
@ -144,7 +153,7 @@ vug_init(VugDev *dev, uint16_t max_queues, int socket,
}
dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL,
(GDestroyNotify) g_source_destroy);
(GDestroyNotify) vug_source_destroy);
dev->src = vug_source_new(dev, socket, G_IO_IN, vug_watch, NULL);
@ -157,5 +166,5 @@ vug_deinit(VugDev *dev)
g_assert(dev);
g_hash_table_unref(dev->fdmap);
g_source_unref(dev->src);
vug_source_destroy(dev->src);
}

View file

@ -31,5 +31,6 @@ void vug_deinit(VugDev *dev);
GSource *vug_source_new(VugDev *dev, int fd, GIOCondition cond,
vu_watch_cb vu_cb, gpointer data);
void vug_source_destroy(GSource *src);
#endif /* LIBVHOST_USER_GLIB_H */

View file

@ -136,6 +136,7 @@ vu_request_to_string(unsigned int req)
REQ(VHOST_USER_GET_INFLIGHT_FD),
REQ(VHOST_USER_SET_INFLIGHT_FD),
REQ(VHOST_USER_GPU_SET_SOCKET),
REQ(VHOST_USER_VRING_KICK),
REQ(VHOST_USER_MAX),
};
#undef REQ
@ -163,7 +164,10 @@ vu_panic(VuDev *dev, const char *msg, ...)
dev->panic(dev, buf);
free(buf);
/* FIXME: find a way to call virtio_error? */
/*
* FIXME:
* find a way to call virtio_error, or perhaps close the connection?
*/
}
/* Translate guest physical address to our virtual address. */
@ -948,6 +952,7 @@ static bool
vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
if (index >= dev->max_queues) {
vmsg_close_fds(vmsg);
@ -955,8 +960,12 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
vmsg->fd_num != 1) {
if (nofd) {
vmsg_close_fds(vmsg);
return true;
}
if (vmsg->fd_num != 1) {
vmsg_close_fds(vmsg);
vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
return false;
@ -1053,6 +1062,7 @@ static bool
vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@ -1066,8 +1076,8 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].kick_fd = -1;
}
dev->vq[index].kick_fd = vmsg->fds[0];
DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
dev->vq[index].kick_fd = nofd ? -1 : vmsg->fds[0];
DPRINT("Got kick_fd: %d for vq: %d\n", dev->vq[index].kick_fd, index);
dev->vq[index].started = true;
if (dev->iface->queue_set_started) {
@ -1147,6 +1157,7 @@ static bool
vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@ -1159,14 +1170,14 @@ vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].call_fd = -1;
}
dev->vq[index].call_fd = vmsg->fds[0];
dev->vq[index].call_fd = nofd ? -1 : vmsg->fds[0];
/* in case of I/O hang after reconnecting */
if (eventfd_write(vmsg->fds[0], 1)) {
if (dev->vq[index].call_fd != -1 && eventfd_write(vmsg->fds[0], 1)) {
return -1;
}
DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
DPRINT("Got call_fd: %d for vq: %d\n", dev->vq[index].call_fd, index);
return false;
}
@ -1175,6 +1186,7 @@ static bool
vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
bool nofd = vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
@ -1187,7 +1199,7 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->vq[index].err_fd = -1;
}
dev->vq[index].err_fd = vmsg->fds[0];
dev->vq[index].err_fd = nofd ? -1 : vmsg->fds[0];
return false;
}
@ -1195,11 +1207,20 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
static bool
vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
/*
* Note that we support, but intentionally do not set,
* VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. This means that
* a device implementation can return it in its callback
* (get_protocol_features) if it wants to use this for
* simulation, but it is otherwise not desirable (if even
* implemented by the master.)
*/
uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ |
1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD |
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ |
1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER |
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD;
1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD |
1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK;
if (have_userfault()) {
features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT;
@ -1226,6 +1247,25 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
dev->protocol_features = vmsg->payload.u64;
if (vu_has_protocol_feature(dev,
VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
(!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ) ||
!vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
/*
* The use case for using messages for kick/call is simulation, to make
* the kick and call synchronous. To actually get that behaviour, both
* of the other features are required.
* Theoretically, one could use only kick messages, or do them without
* having F_REPLY_ACK, but too many (possibly pending) messages on the
* socket will eventually cause the master to hang, to avoid this in
* scenarios where not desired enforce that the settings are in a way
* that actually enables the simulation case.
*/
vu_panic(dev,
"F_IN_BAND_NOTIFICATIONS requires F_SLAVE_REQ && F_REPLY_ACK");
return false;
}
if (dev->iface->set_protocol_features) {
dev->iface->set_protocol_features(dev, features);
}
@ -1486,6 +1526,34 @@ vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
return false;
}
static bool
vu_handle_vring_kick(VuDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
if (index >= dev->max_queues) {
vu_panic(dev, "Invalid queue index: %u", index);
return false;
}
DPRINT("Got kick message: handler:%p idx:%d\n",
dev->vq[index].handler, index);
if (!dev->vq[index].started) {
dev->vq[index].started = true;
if (dev->iface->queue_set_started) {
dev->iface->queue_set_started(dev, index, true);
}
}
if (dev->vq[index].handler) {
dev->vq[index].handler(dev, index);
}
return false;
}
static bool
vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
{
@ -1568,6 +1636,8 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
return vu_get_inflight_fd(dev, vmsg);
case VHOST_USER_SET_INFLIGHT_FD:
return vu_set_inflight_fd(dev, vmsg);
case VHOST_USER_VRING_KICK:
return vu_handle_vring_kick(dev, vmsg);
default:
vmsg_close_fds(vmsg);
vu_panic(dev, "Unhandled request: %d", vmsg->request);
@ -1581,13 +1651,20 @@ vu_dispatch(VuDev *dev)
{
VhostUserMsg vmsg = { 0, };
int reply_requested;
bool success = false;
bool need_reply, success = false;
if (!vu_message_read(dev, dev->sock, &vmsg)) {
goto end;
}
need_reply = vmsg.flags & VHOST_USER_NEED_REPLY_MASK;
reply_requested = vu_process_message(dev, &vmsg);
if (!reply_requested && need_reply) {
vmsg_set_reply_u64(&vmsg, 0);
reply_requested = 1;
}
if (!reply_requested) {
success = true;
goto end;
@ -2022,8 +2099,7 @@ vring_notify(VuDev *dev, VuVirtq *vq)
return !v || vring_need_event(vring_get_used_event(vq), new, old);
}
void
vu_queue_notify(VuDev *dev, VuVirtq *vq)
static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
{
if (unlikely(dev->broken) ||
unlikely(!vq->vring.avail)) {
@ -2035,11 +2111,48 @@ vu_queue_notify(VuDev *dev, VuVirtq *vq)
return;
}
if (vq->call_fd < 0 &&
vu_has_protocol_feature(dev,
VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
vu_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
VhostUserMsg vmsg = {
.request = VHOST_USER_SLAVE_VRING_CALL,
.flags = VHOST_USER_VERSION,
.size = sizeof(vmsg.payload.state),
.payload.state = {
.index = vq - dev->vq,
},
};
bool ack = sync &&
vu_has_protocol_feature(dev,
VHOST_USER_PROTOCOL_F_REPLY_ACK);
if (ack) {
vmsg.flags |= VHOST_USER_NEED_REPLY_MASK;
}
vu_message_write(dev, dev->slave_fd, &vmsg);
if (ack) {
vu_message_read(dev, dev->slave_fd, &vmsg);
}
return;
}
if (eventfd_write(vq->call_fd, 1) < 0) {
vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
}
}
void vu_queue_notify(VuDev *dev, VuVirtq *vq)
{
_vu_queue_notify(dev, vq, false);
}
void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq)
{
_vu_queue_notify(dev, vq, true);
}
static inline void
vring_used_flags_set_bit(VuVirtq *vq, int mask)
{

View file

@ -54,6 +54,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
VHOST_USER_PROTOCOL_F_MAX
};
@ -95,6 +96,7 @@ typedef enum VhostUserRequest {
VHOST_USER_GET_INFLIGHT_FD = 31,
VHOST_USER_SET_INFLIGHT_FD = 32,
VHOST_USER_GPU_SET_SOCKET = 33,
VHOST_USER_VRING_KICK = 35,
VHOST_USER_MAX
} VhostUserRequest;
@ -103,6 +105,8 @@ typedef enum VhostUserSlaveRequest {
VHOST_USER_SLAVE_IOTLB_MSG = 1,
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
VHOST_USER_SLAVE_VRING_CALL = 4,
VHOST_USER_SLAVE_VRING_ERR = 5,
VHOST_USER_SLAVE_MAX
} VhostUserSlaveRequest;
@ -528,6 +532,16 @@ bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
*/
void vu_queue_notify(VuDev *dev, VuVirtq *vq);
/**
* vu_queue_notify_sync:
* @dev: a VuDev context
* @vq: a VuVirtq queue
*
* Request to notify the queue via callfd (skipped if unnecessary)
* or sync message if possible.
*/
void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
/**
* vu_queue_pop:
* @dev: a VuDev context

View file

@ -187,7 +187,7 @@ vi_queue_set_started(VuDev *dev, int qidx, bool started)
}
if (!started && vi->evsrc) {
g_source_destroy(vi->evsrc);
vug_source_destroy(vi->evsrc);
vi->evsrc = NULL;
}
}
@ -401,9 +401,7 @@ main(int argc, char *argv[])
vug_deinit(&vi.dev);
if (vi.evsrc) {
g_source_unref(vi.evsrc);
}
vug_source_destroy(vi.evsrc);
g_array_free(vi.config, TRUE);
g_free(vi.queue);
return 0;

View file

@ -2,6 +2,7 @@
Vhost-user Protocol
===================
:Copyright: 2014 Virtual Open Systems Sarl.
:Copyright: 2019 Intel Corporation
:Licence: This work is licensed under the terms of the GNU GPL,
version 2 or later. See the COPYING file in the top-level
directory.
@ -279,6 +280,9 @@ If *master* is unable to send the full message or receives a wrong
reply it will close the connection. An optional reconnection mechanism
can be implemented.
If *slave* detects some error such as incompatible features, it may also
close the connection. This should only happen in exceptional circumstances.
Any protocol extensions are gated by protocol feature bits, which
allows full backwards compatibility on both master and slave. As
older slaves don't support negotiating protocol features, a feature
@ -315,7 +319,8 @@ it until ring is started, or after it has been stopped.
Client must start ring upon receiving a kick (that is, detecting that
file descriptor is readable) on the descriptor specified by
``VHOST_USER_SET_VRING_KICK``, and stop ring upon receiving
``VHOST_USER_SET_VRING_KICK`` or receiving the in-band message
``VHOST_USER_VRING_KICK`` if negotiated, and stop ring upon receiving
``VHOST_USER_GET_VRING_BASE``.
While processing the rings (whether they are enabled or not), client
@ -767,25 +772,49 @@ When reconnecting:
#. Resubmit inflight ``DescStatePacked`` entries in order of their
counter value
In-band notifications
---------------------
In some limited situations (e.g. for simulation) it is desirable to
have the kick, call and error (if used) signals done via in-band
messages instead of asynchronous eventfd notifications. This can be
done by negotiating the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS``
protocol feature.
Note that due to the fact that too many messages on the sockets can
cause the sending application(s) to block, it is not advised to use
this feature unless absolutely necessary. It is also considered an
error to negotiate this feature without also negotiating
``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` and ``VHOST_USER_PROTOCOL_F_REPLY_ACK``,
the former is necessary for getting a message channel from the slave
to the master, while the latter needs to be used with the in-band
notification messages to block until they are processed, both to avoid
blocking later and for proper processing (at least in the simulation
use case.) As it has no other way of signalling this error, the slave
should close the connection as a response to a
``VHOST_USER_SET_PROTOCOL_FEATURES`` message that sets the in-band
notifications feature flag without the other two.
Protocol features
-----------------
.. code:: c
#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
#define VHOST_USER_PROTOCOL_F_MTU 4
#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
#define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6
#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
#define VHOST_USER_PROTOCOL_F_CONFIG 9
#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
#define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
#define VHOST_USER_PROTOCOL_F_MQ 0
#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
#define VHOST_USER_PROTOCOL_F_RARP 2
#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
#define VHOST_USER_PROTOCOL_F_MTU 4
#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
#define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6
#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
#define VHOST_USER_PROTOCOL_F_CONFIG 9
#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
#define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
Master message types
--------------------
@ -947,7 +976,12 @@ Master message types
Bits (0-7) of the payload contain the vring index. Bit 8 is the
invalid FD flag. This flag is set when there is no file descriptor
in the ancillary data. This signals that polling should be used
instead of waiting for a kick.
instead of waiting for the kick. Note that if the protocol feature
``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` has been negotiated
this message isn't necessary as the ring is also started on the
``VHOST_USER_VRING_KICK`` message, it may however still be used to
set an event file descriptor (which will be preferred over the
message) or to enable polling.
``VHOST_USER_SET_VRING_CALL``
:id: 13
@ -960,7 +994,12 @@ Master message types
Bits (0-7) of the payload contain the vring index. Bit 8 is the
invalid FD flag. This flag is set when there is no file descriptor
in the ancillary data. This signals that polling will be used
instead of waiting for the call.
instead of waiting for the call. Note that if the protocol features
``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and
``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message
isn't necessary as the ``VHOST_USER_SLAVE_VRING_CALL`` message can be
used, it may however still be used to set an event file descriptor
or to enable polling.
``VHOST_USER_SET_VRING_ERR``
:id: 14
@ -972,7 +1011,12 @@ Master message types
Bits (0-7) of the payload contain the vring index. Bit 8 is the
invalid FD flag. This flag is set when there is no file descriptor
in the ancillary data.
in the ancillary data. Note that if the protocol features
``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` and
``VHOST_USER_PROTOCOL_F_SLAVE_REQ`` have been negotiated this message
isn't necessary as the ``VHOST_USER_SLAVE_VRING_ERR`` message can be
used, it may however still be used to set an event file descriptor
(which will be preferred over the message).
``VHOST_USER_GET_QUEUE_NUM``
:id: 17
@ -1205,6 +1249,20 @@ Master message types
Only valid if the ``VHOST_USER_PROTOCOL_F_RESET_DEVICE`` protocol
feature is set by the backend.
``VHOST_USER_VRING_KICK``
:id: 35
:equivalent ioctl: N/A
:slave payload: vring state description
:master payload: N/A
When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol
feature has been successfully negotiated, this message may be
submitted by the master to indicate that a buffer was added to
the vring instead of signalling it using the vring's kick file
descriptor or having the slave rely on polling.
The state.num field is currently reserved and must be set to 0.
Slave message types
-------------------
@ -1261,6 +1319,34 @@ Slave message types
``VHOST_USER_PROTOCOL_F_HOST_NOTIFIER`` protocol feature has been
successfully negotiated.
``VHOST_USER_SLAVE_VRING_CALL``
:id: 4
:equivalent ioctl: N/A
:slave payload: vring state description
:master payload: N/A
When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol
feature has been successfully negotiated, this message may be
submitted by the slave to indicate that a buffer was used from
the vring instead of signalling this using the vring's call file
descriptor or having the master relying on polling.
The state.num field is currently reserved and must be set to 0.
``VHOST_USER_SLAVE_VRING_ERR``
:id: 5
:equivalent ioctl: N/A
:slave payload: vring state description
:master payload: N/A
When the ``VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS`` protocol
feature has been successfully negotiated, this message may be
submitted by the slave to indicate that an error occurred on the
specific vring, instead of signalling the error file descriptor
set by the master via ``VHOST_USER_SET_VRING_ERR``.
The state.num field is currently reserved and must be set to 0.
.. _reply_ack:
VHOST_USER_PROTOCOL_F_REPLY_ACK

View file

@ -94,6 +94,8 @@ write access:
register in QEMU
2: following writes to 'Command data' register set OST status
register in QEMU
3: following reads from 'Command data' and 'Command data 2' return
architecture specific CPU ID value for currently selected CPU.
other values: reserved
[0x6-0x7] reserved
[0x8] Command data: (DWORD access)

View file

@ -32,6 +32,7 @@
#include "qemu-common.h"
#include "qemu/units.h"
#include "qemu/option.h"
#include "monitor/qdev.h"
#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/boards.h"
@ -54,6 +55,7 @@
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "hw/pci-host/gpex.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/arm/sysbus-fdt.h"
#include "hw/platform-bus.h"
#include "hw/qdev-properties.h"
@ -71,6 +73,7 @@
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
#include "hw/acpi/generic_event_device.h"
#include "hw/virtio/virtio-iommu.h"
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@ -1180,6 +1183,30 @@ static void create_smmu(const VirtMachineState *vms,
g_free(node);
}
static void create_virtio_iommu_dt_bindings(VirtMachineState *vms, Error **errp)
{
const char compat[] = "virtio,pci-iommu";
uint16_t bdf = vms->virtio_iommu_bdf;
char *node;
vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt);
node = g_strdup_printf("%s/virtio_iommu@%d", vms->pciehb_nodename, bdf);
qemu_fdt_add_subnode(vms->fdt, node);
qemu_fdt_setprop(vms->fdt, node, "compatible", compat, sizeof(compat));
qemu_fdt_setprop_sized_cells(vms->fdt, node, "reg",
1, bdf << 8, 1, 0, 1, 0,
1, 0, 1, 0);
qemu_fdt_setprop_cell(vms->fdt, node, "#iommu-cells", 1);
qemu_fdt_setprop_cell(vms->fdt, node, "phandle", vms->iommu_phandle);
g_free(node);
qemu_fdt_setprop_cells(vms->fdt, vms->pciehb_nodename, "iommu-map",
0x0, vms->iommu_phandle, 0x0, bdf,
bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
}
static void create_pcie(VirtMachineState *vms)
{
hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base;
@ -1258,7 +1285,7 @@ static void create_pcie(VirtMachineState *vms)
}
}
nodename = g_strdup_printf("/pcie@%" PRIx64, base);
nodename = vms->pciehb_nodename = g_strdup_printf("/pcie@%" PRIx64, base);
qemu_fdt_add_subnode(vms->fdt, nodename);
qemu_fdt_setprop_string(vms->fdt, nodename,
"compatible", "pci-host-ecam-generic");
@ -1301,13 +1328,16 @@ static void create_pcie(VirtMachineState *vms)
if (vms->iommu) {
vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt);
create_smmu(vms, pci->bus);
qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map",
0x0, vms->iommu_phandle, 0x0, 0x10000);
switch (vms->iommu) {
case VIRT_IOMMU_SMMUV3:
create_smmu(vms, pci->bus);
qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map",
0x0, vms->iommu_phandle, 0x0, 0x10000);
break;
default:
g_assert_not_reached();
}
}
g_free(nodename);
}
static void create_platform_bus(VirtMachineState *vms)
@ -1974,6 +2004,13 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
virt_memory_plug(hotplug_dev, dev, errp);
}
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
PCIDevice *pdev = PCI_DEVICE(dev);
vms->iommu = VIRT_IOMMU_VIRTIO;
vms->virtio_iommu_bdf = pci_get_bdf(pdev);
create_virtio_iommu_dt_bindings(vms, errp);
}
}
static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
@ -1990,7 +2027,13 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
(object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) {
return HOTPLUG_HANDLER(machine);
}
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
VirtMachineState *vms = VIRT_MACHINE(machine);
if (!vms->bootinfo.firmware_loaded || !acpi_enabled) {
return HOTPLUG_HANDLER(machine);
}
}
return NULL;
}

View file

@ -306,7 +306,7 @@ static int vhost_user_blk_connect(DeviceState *dev)
s->connected = true;
s->dev.nvqs = s->num_queues;
s->dev.vqs = s->vqs;
s->dev.vqs = s->vhost_vqs;
s->dev.vq_index = 0;
s->dev.backend_features = 0;
@ -420,13 +420,14 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
s->virtqs = g_new(VirtQueue *, s->num_queues);
for (i = 0; i < s->num_queues; i++) {
virtio_add_queue(vdev, s->queue_size,
vhost_user_blk_handle_output);
s->virtqs[i] = virtio_add_queue(vdev, s->queue_size,
vhost_user_blk_handle_output);
}
s->inflight = g_new0(struct vhost_inflight, 1);
s->vqs = g_new0(struct vhost_virtqueue, s->num_queues);
s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
s->watch = 0;
s->connected = false;
@ -458,8 +459,12 @@ reconnect:
return;
virtio_err:
g_free(s->vqs);
g_free(s->vhost_vqs);
g_free(s->inflight);
for (i = 0; i < s->num_queues; i++) {
virtio_delete_queue(s->virtqs[i]);
}
g_free(s->virtqs);
virtio_cleanup(vdev);
vhost_user_cleanup(&s->vhost_user);
}
@ -468,14 +473,20 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserBlk *s = VHOST_USER_BLK(dev);
int i;
virtio_set_status(vdev, 0);
qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL,
NULL, NULL, NULL, false);
vhost_dev_cleanup(&s->dev);
vhost_dev_free_inflight(s->inflight);
g_free(s->vqs);
g_free(s->vhost_vqs);
g_free(s->inflight);
for (i = 0; i < s->num_queues; i++) {
virtio_delete_queue(s->virtqs[i]);
}
g_free(s->virtqs);
virtio_cleanup(vdev);
vhost_user_cleanup(&s->vhost_user);
}

View file

@ -9,6 +9,11 @@ config VIRTIO_RNG
default y
depends on VIRTIO
config VIRTIO_IOMMU
bool
default y
depends on VIRTIO
config VIRTIO_PCI
bool
default y if PCI_DEVICES

View file

@ -16,6 +16,7 @@ obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-p
obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o
common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o
obj-$(call land,$(CONFIG_VHOST_USER_FS),$(CONFIG_VIRTIO_PCI)) += vhost-user-fs-pci.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o
ifeq ($(CONFIG_VIRTIO_PCI),y)
@ -28,6 +29,7 @@ obj-$(CONFIG_VIRTIO_INPUT_HOST) += virtio-input-host-pci.o
obj-$(CONFIG_VIRTIO_INPUT) += virtio-input-pci.o
obj-$(CONFIG_VIRTIO_RNG) += virtio-rng-pci.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon-pci.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu-pci.o
obj-$(CONFIG_VIRTIO_9P) += virtio-9p-pci.o
obj-$(CONFIG_VIRTIO_SCSI) += virtio-scsi-pci.o
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk-pci.o

View file

@ -53,3 +53,23 @@ virtio_mmio_write_offset(uint64_t offset, uint64_t value) "virtio_mmio_write off
virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 " shift %d"
virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" PRIx64 " max %d"
virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
# hw/virtio/virtio-iommu.c
virtio_iommu_device_reset(void) "reset!"
virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64
virtio_iommu_device_status(uint8_t status) "driver status = %d"
virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_range=%d probe_size=0x%x"
virtio_iommu_set_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" start=0x%"PRIx64" end=0x%"PRIx64" domain_bits=%d probe_size=0x%x"
virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d"
virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d"
virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64
virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s"
virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d"
virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d"
virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d"
virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
virtio_iommu_report_fault(uint8_t reason, uint32_t flags, uint32_t endpoint, uint64_t addr) "FAULT reason=%d flags=%d endpoint=%d address =0x%"PRIx64

View file

@ -209,11 +209,12 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
sizeof(struct virtio_fs_config));
/* Hiprio queue */
virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
fs->hiprio_vq = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
/* Request queues */
fs->req_vqs = g_new(VirtQueue *, fs->conf.num_request_queues);
for (i = 0; i < fs->conf.num_request_queues; i++) {
virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
fs->req_vqs[i] = virtio_add_queue(vdev, fs->conf.queue_size, vuf_handle_output);
}
/* 1 high prio queue, plus the number configured */
@ -230,6 +231,11 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
err_virtio:
vhost_user_cleanup(&fs->vhost_user);
virtio_delete_queue(fs->hiprio_vq);
for (i = 0; i < fs->conf.num_request_queues; i++) {
virtio_delete_queue(fs->req_vqs[i]);
}
g_free(fs->req_vqs);
virtio_cleanup(vdev);
g_free(fs->vhost_dev.vqs);
return;
@ -239,6 +245,7 @@ static void vuf_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VHostUserFS *fs = VHOST_USER_FS(dev);
int i;
/* This will stop vhost backend if appropriate. */
vuf_set_status(vdev, 0);
@ -247,6 +254,11 @@ static void vuf_device_unrealize(DeviceState *dev, Error **errp)
vhost_user_cleanup(&fs->vhost_user);
virtio_delete_queue(fs->hiprio_vq);
for (i = 0; i < fs->conf.num_request_queues; i++) {
virtio_delete_queue(fs->req_vqs[i]);
}
g_free(fs->req_vqs);
virtio_cleanup(vdev);
g_free(fs->vhost_dev.vqs);
fs->vhost_dev.vqs = NULL;

View file

@ -443,6 +443,7 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
reg->memory_size,
reg->guest_phys_addr,
@ -455,7 +456,6 @@ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
msg.payload.memory.regions[fd_num].guest_phys_addr =
reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = offset;
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
} else {
u->region_rb_offset[i] = 0;
@ -1458,9 +1458,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
"VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
}
err = vhost_setup_slave_channel(dev);
if (err < 0) {
return err;
if (dev->vq_index == 0) {
err = vhost_setup_slave_channel(dev);
if (err < 0) {
return err;
}
}
u->postcopy_notifier.notify = vhost_user_postcopy_notifier;

View file

@ -831,12 +831,13 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp)
max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1;
for (i = 0; i < max_queues; i++) {
virtio_del_queue(vdev, i);
virtio_delete_queue(vcrypto->vqs[i].dataq);
q = &vcrypto->vqs[i];
qemu_bh_delete(q->dataq_bh);
}
g_free(vcrypto->vqs);
virtio_delete_queue(vcrypto->ctrl_vq);
virtio_cleanup(vdev);
cryptodev_backend_set_used(vcrypto->cryptodev, false);

View file

@ -0,0 +1,104 @@
/*
* Virtio IOMMU PCI Bindings
*
* Copyright (c) 2019 Red Hat, Inc.
* Written by Eric Auger
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 or
* (at your option) any later version.
*/
#include "qemu/osdep.h"
#include "virtio-pci.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/qdev-properties.h"
#include "qapi/error.h"
#include "hw/boards.h"
typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI;
/*
* virtio-iommu-pci: This extends VirtioPCIProxy.
*
*/
#define VIRTIO_IOMMU_PCI(obj) \
OBJECT_CHECK(VirtIOIOMMUPCI, (obj), TYPE_VIRTIO_IOMMU_PCI)
struct VirtIOIOMMUPCI {
VirtIOPCIProxy parent_obj;
VirtIOIOMMU vdev;
};
static Property virtio_iommu_pci_properties[] = {
DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
DEFINE_PROP_END_OF_LIST(),
};
static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev);
DeviceState *vdev = DEVICE(&dev->vdev);
if (!qdev_get_machine_hotplug_handler(DEVICE(vpci_dev))) {
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
error_setg(errp,
"%s machine fails to create iommu-map device tree bindings",
mc->name);
error_append_hint(errp,
"Check you machine implements a hotplug handler "
"for the virtio-iommu-pci device\n");
error_append_hint(errp, "Check the guest is booted without FW or with "
"-no-acpi\n");
return;
}
qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
object_property_set_link(OBJECT(dev),
OBJECT(pci_get_bus(&vpci_dev->pci_dev)),
"primary-bus", errp);
object_property_set_bool(OBJECT(vdev), true, "realized", errp);
}
static void virtio_iommu_pci_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
k->realize = virtio_iommu_pci_realize;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
device_class_set_props(dc, virtio_iommu_pci_properties);
pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_IOMMU;
pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
pcidev_k->class_id = PCI_CLASS_OTHERS;
dc->hotpluggable = false;
}
static void virtio_iommu_pci_instance_init(Object *obj)
{
VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(obj);
virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
TYPE_VIRTIO_IOMMU);
}
static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = {
.base_name = TYPE_VIRTIO_IOMMU_PCI,
.generic_name = "virtio-iommu-pci",
.transitional_name = "virtio-iommu-pci-transitional",
.non_transitional_name = "virtio-iommu-pci-non-transitional",
.instance_size = sizeof(VirtIOIOMMUPCI),
.instance_init = virtio_iommu_pci_instance_init,
.class_init = virtio_iommu_pci_class_init,
};
static void virtio_iommu_pci_register(void)
{
virtio_pci_types_register(&virtio_iommu_pci_info);
}
type_init(virtio_iommu_pci_register)

890
hw/virtio/virtio-iommu.c Normal file
View file

@ -0,0 +1,890 @@
/*
* virtio-iommu device
*
* Copyright (c) 2020 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2 or later, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/iov.h"
#include "qemu-common.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio.h"
#include "sysemu/kvm.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "trace.h"
#include "standard-headers/linux/virtio_ids.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci.h"
/* Max size */
#define VIOMMU_DEFAULT_QUEUE_SIZE 256
typedef struct VirtIOIOMMUDomain {
uint32_t id;
GTree *mappings;
QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list;
} VirtIOIOMMUDomain;
typedef struct VirtIOIOMMUEndpoint {
uint32_t id;
VirtIOIOMMUDomain *domain;
QLIST_ENTRY(VirtIOIOMMUEndpoint) next;
} VirtIOIOMMUEndpoint;
typedef struct VirtIOIOMMUInterval {
uint64_t low;
uint64_t high;
} VirtIOIOMMUInterval;
typedef struct VirtIOIOMMUMapping {
uint64_t phys_addr;
uint32_t flags;
} VirtIOIOMMUMapping;
static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
{
return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
}
/**
* The bus number is used for lookup when SID based operations occur.
* In that case we lazily populate the IOMMUPciBus array from the bus hash
* table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
* numbers may not be always initialized yet.
*/
static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num)
{
IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num];
if (!iommu_pci_bus) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, s->as_by_busptr);
while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) {
if (pci_bus_num(iommu_pci_bus->bus) == bus_num) {
s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus;
return iommu_pci_bus;
}
}
return NULL;
}
return iommu_pci_bus;
}
static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid)
{
uint8_t bus_n, devfn;
IOMMUPciBus *iommu_pci_bus;
IOMMUDevice *dev;
bus_n = PCI_BUS_NUM(sid);
iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n);
if (iommu_pci_bus) {
devfn = sid & PCI_DEVFN_MAX;
dev = iommu_pci_bus->pbdev[devfn];
if (dev) {
return &dev->iommu_mr;
}
}
return NULL;
}
static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
{
VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a;
VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b;
if (inta->high < intb->low) {
return -1;
} else if (intb->high < inta->low) {
return 1;
} else {
return 0;
}
}
static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
{
if (!ep->domain) {
return;
}
QLIST_REMOVE(ep, next);
ep->domain = NULL;
}
static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
uint32_t ep_id)
{
VirtIOIOMMUEndpoint *ep;
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
if (ep) {
return ep;
}
if (!virtio_iommu_mr(s, ep_id)) {
return NULL;
}
ep = g_malloc0(sizeof(*ep));
ep->id = ep_id;
trace_virtio_iommu_get_endpoint(ep_id);
g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep);
return ep;
}
static void virtio_iommu_put_endpoint(gpointer data)
{
VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data;
if (ep->domain) {
virtio_iommu_detach_endpoint_from_domain(ep);
}
trace_virtio_iommu_put_endpoint(ep->id);
g_free(ep);
}
static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s,
uint32_t domain_id)
{
VirtIOIOMMUDomain *domain;
domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
if (domain) {
return domain;
}
domain = g_malloc0(sizeof(*domain));
domain->id = domain_id;
domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
NULL, (GDestroyNotify)g_free,
(GDestroyNotify)g_free);
g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain);
QLIST_INIT(&domain->endpoint_list);
trace_virtio_iommu_get_domain(domain_id);
return domain;
}
static void virtio_iommu_put_domain(gpointer data)
{
VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data;
VirtIOIOMMUEndpoint *iter, *tmp;
QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) {
virtio_iommu_detach_endpoint_from_domain(iter);
}
g_tree_destroy(domain->mappings);
trace_virtio_iommu_put_domain(domain->id);
g_free(domain);
}
static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
int devfn)
{
VirtIOIOMMU *s = opaque;
IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
static uint32_t mr_index;
IOMMUDevice *sdev;
if (!sbus) {
sbus = g_malloc0(sizeof(IOMMUPciBus) +
sizeof(IOMMUDevice *) * PCI_DEVFN_MAX);
sbus->bus = bus;
g_hash_table_insert(s->as_by_busptr, bus, sbus);
}
sdev = sbus->pbdev[devfn];
if (!sdev) {
char *name = g_strdup_printf("%s-%d-%d",
TYPE_VIRTIO_IOMMU_MEMORY_REGION,
mr_index++, devfn);
sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice));
sdev->viommu = s;
sdev->bus = bus;
sdev->devfn = devfn;
trace_virtio_iommu_init_iommu_mr(name);
memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr),
TYPE_VIRTIO_IOMMU_MEMORY_REGION,
OBJECT(s), name,
UINT64_MAX);
address_space_init(&sdev->as,
MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU);
g_free(name);
}
return &sdev->as;
}
static int virtio_iommu_attach(VirtIOIOMMU *s,
struct virtio_iommu_req_attach *req)
{
uint32_t domain_id = le32_to_cpu(req->domain);
uint32_t ep_id = le32_to_cpu(req->endpoint);
VirtIOIOMMUDomain *domain;
VirtIOIOMMUEndpoint *ep;
trace_virtio_iommu_attach(domain_id, ep_id);
ep = virtio_iommu_get_endpoint(s, ep_id);
if (!ep) {
return VIRTIO_IOMMU_S_NOENT;
}
if (ep->domain) {
VirtIOIOMMUDomain *previous_domain = ep->domain;
/*
* the device is already attached to a domain,
* detach it first
*/
virtio_iommu_detach_endpoint_from_domain(ep);
if (QLIST_EMPTY(&previous_domain->endpoint_list)) {
g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id));
}
}
domain = virtio_iommu_get_domain(s, domain_id);
QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next);
ep->domain = domain;
return VIRTIO_IOMMU_S_OK;
}
static int virtio_iommu_detach(VirtIOIOMMU *s,
struct virtio_iommu_req_detach *req)
{
uint32_t domain_id = le32_to_cpu(req->domain);
uint32_t ep_id = le32_to_cpu(req->endpoint);
VirtIOIOMMUDomain *domain;
VirtIOIOMMUEndpoint *ep;
trace_virtio_iommu_detach(domain_id, ep_id);
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id));
if (!ep) {
return VIRTIO_IOMMU_S_NOENT;
}
domain = ep->domain;
if (!domain || domain->id != domain_id) {
return VIRTIO_IOMMU_S_INVAL;
}
virtio_iommu_detach_endpoint_from_domain(ep);
if (QLIST_EMPTY(&domain->endpoint_list)) {
g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
}
return VIRTIO_IOMMU_S_OK;
}
static int virtio_iommu_map(VirtIOIOMMU *s,
struct virtio_iommu_req_map *req)
{
uint32_t domain_id = le32_to_cpu(req->domain);
uint64_t phys_start = le64_to_cpu(req->phys_start);
uint64_t virt_start = le64_to_cpu(req->virt_start);
uint64_t virt_end = le64_to_cpu(req->virt_end);
uint32_t flags = le32_to_cpu(req->flags);
VirtIOIOMMUDomain *domain;
VirtIOIOMMUInterval *interval;
VirtIOIOMMUMapping *mapping;
if (flags & ~VIRTIO_IOMMU_MAP_F_MASK) {
return VIRTIO_IOMMU_S_INVAL;
}
domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
if (!domain) {
return VIRTIO_IOMMU_S_NOENT;
}
interval = g_malloc0(sizeof(*interval));
interval->low = virt_start;
interval->high = virt_end;
mapping = g_tree_lookup(domain->mappings, (gpointer)interval);
if (mapping) {
g_free(interval);
return VIRTIO_IOMMU_S_INVAL;
}
trace_virtio_iommu_map(domain_id, virt_start, virt_end, phys_start, flags);
mapping = g_malloc0(sizeof(*mapping));
mapping->phys_addr = phys_start;
mapping->flags = flags;
g_tree_insert(domain->mappings, interval, mapping);
return VIRTIO_IOMMU_S_OK;
}
static int virtio_iommu_unmap(VirtIOIOMMU *s,
struct virtio_iommu_req_unmap *req)
{
uint32_t domain_id = le32_to_cpu(req->domain);
uint64_t virt_start = le64_to_cpu(req->virt_start);
uint64_t virt_end = le64_to_cpu(req->virt_end);
VirtIOIOMMUMapping *iter_val;
VirtIOIOMMUInterval interval, *iter_key;
VirtIOIOMMUDomain *domain;
int ret = VIRTIO_IOMMU_S_OK;
trace_virtio_iommu_unmap(domain_id, virt_start, virt_end);
domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id));
if (!domain) {
return VIRTIO_IOMMU_S_NOENT;
}
interval.low = virt_start;
interval.high = virt_end;
while (g_tree_lookup_extended(domain->mappings, &interval,
(void **)&iter_key, (void**)&iter_val)) {
uint64_t current_low = iter_key->low;
uint64_t current_high = iter_key->high;
if (interval.low <= current_low && interval.high >= current_high) {
g_tree_remove(domain->mappings, iter_key);
trace_virtio_iommu_unmap_done(domain_id, current_low, current_high);
} else {
ret = VIRTIO_IOMMU_S_RANGE;
break;
}
}
return ret;
}
static int virtio_iommu_iov_to_req(struct iovec *iov,
unsigned int iov_cnt,
void *req, size_t req_sz)
{
size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail);
sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz);
if (unlikely(sz != payload_sz)) {
return VIRTIO_IOMMU_S_INVAL;
}
return 0;
}
#define virtio_iommu_handle_req(__req) \
static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
struct iovec *iov, \
unsigned int iov_cnt) \
{ \
struct virtio_iommu_req_ ## __req req; \
int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
\
return ret ? ret : virtio_iommu_ ## __req(s, &req); \
}
virtio_iommu_handle_req(attach)
virtio_iommu_handle_req(detach)
virtio_iommu_handle_req(map)
virtio_iommu_handle_req(unmap)
static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
struct virtio_iommu_req_head head;
struct virtio_iommu_req_tail tail = {};
VirtQueueElement *elem;
unsigned int iov_cnt;
struct iovec *iov;
size_t sz;
for (;;) {
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
return;
}
if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
virtio_error(vdev, "virtio-iommu bad head/tail size");
virtqueue_detach_element(vq, elem, 0);
g_free(elem);
break;
}
iov_cnt = elem->out_num;
iov = elem->out_sg;
sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
if (unlikely(sz != sizeof(head))) {
tail.status = VIRTIO_IOMMU_S_DEVERR;
goto out;
}
qemu_mutex_lock(&s->mutex);
switch (head.type) {
case VIRTIO_IOMMU_T_ATTACH:
tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
break;
case VIRTIO_IOMMU_T_DETACH:
tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
break;
case VIRTIO_IOMMU_T_MAP:
tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
break;
case VIRTIO_IOMMU_T_UNMAP:
tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
break;
default:
tail.status = VIRTIO_IOMMU_S_UNSUPP;
}
qemu_mutex_unlock(&s->mutex);
out:
sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
&tail, sizeof(tail));
assert(sz == sizeof(tail));
virtqueue_push(vq, elem, sizeof(tail));
virtio_notify(vdev, vq);
g_free(elem);
}
}
static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
int flags, uint32_t endpoint,
uint64_t address)
{
VirtIODevice *vdev = &viommu->parent_obj;
VirtQueue *vq = viommu->event_vq;
struct virtio_iommu_fault fault;
VirtQueueElement *elem;
size_t sz;
memset(&fault, 0, sizeof(fault));
fault.reason = reason;
fault.flags = cpu_to_le32(flags);
fault.endpoint = cpu_to_le32(endpoint);
fault.address = cpu_to_le64(address);
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
error_report_once(
"no buffer available in event queue to report event");
return;
}
if (iov_size(elem->in_sg, elem->in_num) < sizeof(fault)) {
virtio_error(vdev, "error buffer of wrong size");
virtqueue_detach_element(vq, elem, 0);
g_free(elem);
return;
}
sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
&fault, sizeof(fault));
assert(sz == sizeof(fault));
trace_virtio_iommu_report_fault(reason, flags, endpoint, address);
virtqueue_push(vq, elem, sz);
virtio_notify(vdev, vq);
g_free(elem);
}
static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
IOMMUAccessFlags flag,
int iommu_idx)
{
IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
VirtIOIOMMUInterval interval, *mapping_key;
VirtIOIOMMUMapping *mapping_value;
VirtIOIOMMU *s = sdev->viommu;
bool read_fault, write_fault;
VirtIOIOMMUEndpoint *ep;
uint32_t sid, flags;
bool bypass_allowed;
bool found;
interval.low = addr;
interval.high = addr + 1;
IOMMUTLBEntry entry = {
.target_as = &address_space_memory,
.iova = addr,
.translated_addr = addr,
.addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
.perm = IOMMU_NONE,
};
bypass_allowed = virtio_vdev_has_feature(&s->parent_obj,
VIRTIO_IOMMU_F_BYPASS);
sid = virtio_iommu_get_bdf(sdev);
trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
qemu_mutex_lock(&s->mutex);
ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
if (!ep) {
if (!bypass_allowed) {
error_report_once("%s sid=%d is not known!!", __func__, sid);
virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_UNKNOWN,
VIRTIO_IOMMU_FAULT_F_ADDRESS,
sid, addr);
} else {
entry.perm = flag;
}
goto unlock;
}
if (!ep->domain) {
if (!bypass_allowed) {
error_report_once("%s %02x:%02x.%01x not attached to any domain",
__func__, PCI_BUS_NUM(sid),
PCI_SLOT(sid), PCI_FUNC(sid));
virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_DOMAIN,
VIRTIO_IOMMU_FAULT_F_ADDRESS,
sid, addr);
} else {
entry.perm = flag;
}
goto unlock;
}
found = g_tree_lookup_extended(ep->domain->mappings, (gpointer)(&interval),
(void **)&mapping_key,
(void **)&mapping_value);
if (!found) {
error_report_once("%s no mapping for 0x%"PRIx64" for sid=%d",
__func__, addr, sid);
virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
VIRTIO_IOMMU_FAULT_F_ADDRESS,
sid, addr);
goto unlock;
}
read_fault = (flag & IOMMU_RO) &&
!(mapping_value->flags & VIRTIO_IOMMU_MAP_F_READ);
write_fault = (flag & IOMMU_WO) &&
!(mapping_value->flags & VIRTIO_IOMMU_MAP_F_WRITE);
flags = read_fault ? VIRTIO_IOMMU_FAULT_F_READ : 0;
flags |= write_fault ? VIRTIO_IOMMU_FAULT_F_WRITE : 0;
if (flags) {
error_report_once("%s permission error on 0x%"PRIx64"(%d): allowed=%d",
__func__, addr, flag, mapping_value->flags);
flags |= VIRTIO_IOMMU_FAULT_F_ADDRESS;
virtio_iommu_report_fault(s, VIRTIO_IOMMU_FAULT_R_MAPPING,
flags | VIRTIO_IOMMU_FAULT_F_ADDRESS,
sid, addr);
goto unlock;
}
entry.translated_addr = addr - mapping_key->low + mapping_value->phys_addr;
entry.perm = flag;
trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
unlock:
qemu_mutex_unlock(&s->mutex);
return entry;
}
static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data)
{
VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
struct virtio_iommu_config *config = &dev->config;
trace_virtio_iommu_get_config(config->page_size_mask,
config->input_range.start,
config->input_range.end,
config->domain_range.end,
config->probe_size);
memcpy(config_data, &dev->config, sizeof(struct virtio_iommu_config));
}
static void virtio_iommu_set_config(VirtIODevice *vdev,
const uint8_t *config_data)
{
struct virtio_iommu_config config;
memcpy(&config, config_data, sizeof(struct virtio_iommu_config));
trace_virtio_iommu_set_config(config.page_size_mask,
config.input_range.start,
config.input_range.end,
config.domain_range.end,
config.probe_size);
}
static uint64_t virtio_iommu_get_features(VirtIODevice *vdev, uint64_t f,
Error **errp)
{
VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
f |= dev->features;
trace_virtio_iommu_get_features(f);
return f;
}
static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
{
guint ua = GPOINTER_TO_UINT(a);
guint ub = GPOINTER_TO_UINT(b);
return (ua > ub) - (ua < ub);
}
static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
sizeof(struct virtio_iommu_config));
memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
virtio_iommu_handle_command);
s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
s->config.page_size_mask = TARGET_PAGE_MASK;
s->config.input_range.end = -1UL;
s->config.domain_range.end = 32;
virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX);
virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC);
virtio_add_feature(&s->features, VIRTIO_F_VERSION_1);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_INPUT_RANGE);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS);
virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO);
qemu_mutex_init(&s->mutex);
s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
if (s->primary_bus) {
pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
} else {
error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
}
}
static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
g_tree_destroy(s->domains);
g_tree_destroy(s->endpoints);
virtio_cleanup(vdev);
}
static void virtio_iommu_device_reset(VirtIODevice *vdev)
{
VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
trace_virtio_iommu_device_reset();
if (s->domains) {
g_tree_destroy(s->domains);
}
if (s->endpoints) {
g_tree_destroy(s->endpoints);
}
s->domains = g_tree_new_full((GCompareDataFunc)int_cmp,
NULL, NULL, virtio_iommu_put_domain);
s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp,
NULL, NULL, virtio_iommu_put_endpoint);
}
static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status)
{
trace_virtio_iommu_device_status(status);
}
static void virtio_iommu_instance_init(Object *obj)
{
}
#define VMSTATE_INTERVAL \
{ \
.name = "interval", \
.version_id = 1, \
.minimum_version_id = 1, \
.fields = (VMStateField[]) { \
VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
VMSTATE_END_OF_LIST() \
} \
}
#define VMSTATE_MAPPING \
{ \
.name = "mapping", \
.version_id = 1, \
.minimum_version_id = 1, \
.fields = (VMStateField[]) { \
VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
VMSTATE_END_OF_LIST() \
}, \
}
static const VMStateDescription vmstate_interval_mapping[2] = {
VMSTATE_MAPPING, /* value */
VMSTATE_INTERVAL /* key */
};
static int domain_preload(void *opaque)
{
VirtIOIOMMUDomain *domain = opaque;
domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
NULL, g_free, g_free);
return 0;
}
static const VMStateDescription vmstate_endpoint = {
.name = "endpoint",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(id, VirtIOIOMMUEndpoint),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_domain = {
.name = "domain",
.version_id = 1,
.minimum_version_id = 1,
.pre_load = domain_preload,
.fields = (VMStateField[]) {
VMSTATE_UINT32(id, VirtIOIOMMUDomain),
VMSTATE_GTREE_V(mappings, VirtIOIOMMUDomain, 1,
vmstate_interval_mapping,
VirtIOIOMMUInterval, VirtIOIOMMUMapping),
VMSTATE_QLIST_V(endpoint_list, VirtIOIOMMUDomain, 1,
vmstate_endpoint, VirtIOIOMMUEndpoint, next),
VMSTATE_END_OF_LIST()
}
};
static gboolean reconstruct_endpoints(gpointer key, gpointer value,
gpointer data)
{
VirtIOIOMMU *s = (VirtIOIOMMU *)data;
VirtIOIOMMUDomain *d = (VirtIOIOMMUDomain *)value;
VirtIOIOMMUEndpoint *iter;
QLIST_FOREACH(iter, &d->endpoint_list, next) {
iter->domain = d;
g_tree_insert(s->endpoints, GUINT_TO_POINTER(iter->id), iter);
}
return false; /* continue the domain traversal */
}
static int iommu_post_load(void *opaque, int version_id)
{
VirtIOIOMMU *s = opaque;
g_tree_foreach(s->domains, reconstruct_endpoints, s);
return 0;
}
static const VMStateDescription vmstate_virtio_iommu_device = {
.name = "virtio-iommu-device",
.minimum_version_id = 1,
.version_id = 1,
.post_load = iommu_post_load,
.fields = (VMStateField[]) {
VMSTATE_GTREE_DIRECT_KEY_V(domains, VirtIOIOMMU, 1,
&vmstate_domain, VirtIOIOMMUDomain),
VMSTATE_END_OF_LIST()
},
};
static const VMStateDescription vmstate_virtio_iommu = {
.name = "virtio-iommu",
.minimum_version_id = 1,
.priority = MIG_PRI_IOMMU,
.version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_VIRTIO_DEVICE,
VMSTATE_END_OF_LIST()
},
};
static Property virtio_iommu_properties[] = {
DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
DEFINE_PROP_END_OF_LIST(),
};
static void virtio_iommu_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
device_class_set_props(dc, virtio_iommu_properties);
dc->vmsd = &vmstate_virtio_iommu;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->realize = virtio_iommu_device_realize;
vdc->unrealize = virtio_iommu_device_unrealize;
vdc->reset = virtio_iommu_device_reset;
vdc->get_config = virtio_iommu_get_config;
vdc->set_config = virtio_iommu_set_config;
vdc->get_features = virtio_iommu_get_features;
vdc->set_status = virtio_iommu_set_status;
vdc->vmsd = &vmstate_virtio_iommu_device;
}
static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
void *data)
{
IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
imrc->translate = virtio_iommu_translate;
}
static const TypeInfo virtio_iommu_info = {
.name = TYPE_VIRTIO_IOMMU,
.parent = TYPE_VIRTIO_DEVICE,
.instance_size = sizeof(VirtIOIOMMU),
.instance_init = virtio_iommu_instance_init,
.class_init = virtio_iommu_class_init,
};
static const TypeInfo virtio_iommu_memory_region_info = {
.parent = TYPE_IOMMU_MEMORY_REGION,
.name = TYPE_VIRTIO_IOMMU_MEMORY_REGION,
.class_init = virtio_iommu_memory_region_class_init,
};
static void virtio_register_types(void)
{
type_register_static(&virtio_iommu_info);
type_register_static(&virtio_iommu_memory_region_info);
}
type_init(virtio_register_types)

View file

@ -130,6 +130,7 @@ static void virtio_pmem_unrealize(DeviceState *dev, Error **errp)
VirtIOPMEM *pmem = VIRTIO_PMEM(dev);
host_memory_backend_set_mapped(pmem->memdev, false);
virtio_delete_queue(pmem->rq_vq);
virtio_cleanup(vdev);
}

View file

@ -282,15 +282,19 @@ static void vring_packed_flags_write(VirtIODevice *vdev,
/* Called within rcu_read_lock(). */
static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
assert(caches != NULL);
return caches;
return atomic_rcu_read(&vq->vring.caches);
}
/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, flags);
if (!caches) {
return 0;
}
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
@ -299,6 +303,11 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, idx);
if (!caches) {
return 0;
}
vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
return vq->shadow_avail_idx;
}
@ -308,6 +317,11 @@ static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingAvail, ring[i]);
if (!caches) {
return 0;
}
return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
@ -323,6 +337,11 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingUsed, ring[i]);
if (!caches) {
return;
}
virtio_tswap32s(vq->vdev, &uelem->id);
virtio_tswap32s(vq->vdev, &uelem->len);
address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
@ -334,6 +353,11 @@ static uint16_t vring_used_idx(VirtQueue *vq)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingUsed, idx);
if (!caches) {
return 0;
}
return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
}
@ -342,8 +366,12 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
{
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
hwaddr pa = offsetof(VRingUsed, idx);
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
if (caches) {
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
}
vq->used_idx = val;
}
@ -353,8 +381,13 @@ static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
VirtIODevice *vdev = vq->vdev;
hwaddr pa = offsetof(VRingUsed, flags);
uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
uint16_t flags;
if (!caches) {
return;
}
flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
@ -365,8 +398,13 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
VirtIODevice *vdev = vq->vdev;
hwaddr pa = offsetof(VRingUsed, flags);
uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
uint16_t flags;
if (!caches) {
return;
}
flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
@ -381,6 +419,10 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
}
caches = vring_get_region_caches(vq);
if (!caches) {
return;
}
pa = offsetof(VRingUsed, ring[vq->vring.num]);
virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
address_space_cache_invalidate(&caches->used, pa, sizeof(val));
@ -410,7 +452,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
VRingMemoryRegionCaches *caches;
RCU_READ_LOCK_GUARD();
caches = vring_get_region_caches(vq);
caches = vring_get_region_caches(vq);
if (!caches) {
return;
}
vring_packed_event_read(vq->vdev, &caches->used, &e);
if (!enable) {
@ -597,6 +643,10 @@ static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
}
cache = vring_get_region_caches(vq);
if (!cache) {
return 1;
}
vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
vq->last_avail_idx);
@ -777,6 +827,10 @@ static void virtqueue_packed_fill_desc(VirtQueue *vq,
}
caches = vring_get_region_caches(vq);
if (!caches) {
return;
}
vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
}
@ -949,6 +1003,10 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
max = vq->vring.num;
caches = vring_get_region_caches(vq);
if (!caches) {
goto err;
}
while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
MemoryRegionCache *desc_cache = &caches->desc;
unsigned int num_bufs;
@ -1089,6 +1147,9 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
max = vq->vring.num;
caches = vring_get_region_caches(vq);
if (!caches) {
goto err;
}
for (;;) {
unsigned int num_bufs = total_bufs;
@ -1194,6 +1255,10 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
}
caches = vring_get_region_caches(vq);
if (!caches) {
goto err;
}
desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
sizeof(VRingPackedDesc) : sizeof(VRingDesc);
if (caches->desc.len < vq->vring.num * desc_size) {
@ -1388,6 +1453,11 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
i = head;
caches = vring_get_region_caches(vq);
if (!caches) {
virtio_error(vdev, "Region caches not initialized");
goto done;
}
if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto done;
@ -1510,6 +1580,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
i = vq->last_avail_idx;
caches = vring_get_region_caches(vq);
if (!caches) {
virtio_error(vdev, "Region caches not initialized");
goto done;
}
if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto done;
@ -1629,6 +1704,10 @@ static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
VRingPackedDesc desc;
caches = vring_get_region_caches(vq);
if (!caches) {
return 0;
}
desc_cache = &caches->desc;
virtio_queue_set_notification(vq, 0);
@ -2413,6 +2492,10 @@ static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
VRingMemoryRegionCaches *caches;
caches = vring_get_region_caches(vq);
if (!caches) {
return false;
}
vring_packed_event_read(vdev, &caches->avail, &e);
old = vq->signalled_used;

View file

@ -125,8 +125,10 @@ typedef struct {
bool virt;
int32_t gic_version;
VirtIOMMUType iommu;
uint16_t virtio_iommu_bdf;
struct arm_boot_info bootinfo;
MemMapEntry *memmap;
char *pciehb_nodename;
const int *irqmap;
int smp_cpus;
void *fdt;

View file

@ -86,6 +86,7 @@ extern bool pci_available;
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
#define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012
#define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013
#define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001

View file

@ -36,7 +36,8 @@ typedef struct VHostUserBlk {
struct vhost_dev dev;
struct vhost_inflight *inflight;
VhostUserState vhost_user;
struct vhost_virtqueue *vqs;
struct vhost_virtqueue *vhost_vqs;
VirtQueue **virtqs;
guint watch;
bool connected;
} VHostUserBlk;

View file

@ -37,6 +37,8 @@ typedef struct {
struct vhost_virtqueue *vhost_vqs;
struct vhost_dev vhost_dev;
VhostUserState vhost_user;
VirtQueue **req_vqs;
VirtQueue *hiprio_vq;
/*< public >*/
} VHostUserFS;

View file

@ -0,0 +1,61 @@
/*
* virtio-iommu device
*
* Copyright (c) 2020 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2 or later, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef QEMU_VIRTIO_IOMMU_H
#define QEMU_VIRTIO_IOMMU_H
#include "standard-headers/linux/virtio_iommu.h"
#include "hw/virtio/virtio.h"
#include "hw/pci/pci.h"
#define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-device-base"
#define VIRTIO_IOMMU(obj) \
OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU)
#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region"
typedef struct IOMMUDevice {
void *viommu;
PCIBus *bus;
int devfn;
IOMMUMemoryRegion iommu_mr;
AddressSpace as;
} IOMMUDevice;
typedef struct IOMMUPciBus {
PCIBus *bus;
IOMMUDevice *pbdev[0]; /* Parent array is sparse, so dynamically alloc */
} IOMMUPciBus;
typedef struct VirtIOIOMMU {
VirtIODevice parent_obj;
VirtQueue *req_vq;
VirtQueue *event_vq;
struct virtio_iommu_config config;
uint64_t features;
GHashTable *as_by_busptr;
IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
PCIBus *primary_bus;
GTree *domains;
QemuMutex mutex;
GTree *endpoints;
} VirtIOIOMMU;
#endif

View file

@ -67,6 +67,7 @@ static const QDevAlias qdev_alias_table[] = {
{ "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_S390X },
{ "virtio-input-host-pci", "virtio-input-host",
QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
{ "virtio-iommu-pci", "virtio-iommu", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
{ "virtio-keyboard-ccw", "virtio-keyboard", QEMU_ARCH_S390X },
{ "virtio-keyboard-pci", "virtio-keyboard",
QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },

View file

@ -31,6 +31,13 @@ done
eval `grep SRC_PATH= config-host.mak`
old_allowed_dif=`grep -v -e 'List of comma-separated changed AML files to ignore' ${SRC_PATH}/tests/qtest/bios-tables-test-allowed-diff.h`
echo '/* List of comma-separated changed AML files to ignore */' > ${SRC_PATH}/tests/qtest/bios-tables-test-allowed-diff.h
echo "The files were rebuilt and can be added to git."
if [ -z "$old_allowed_dif" ]; then
echo "Note! Please do not commit expected files with source changes"
echo "Note! Please follow the process documented in ${SRC_PATH}/tests/qtest/bios-tables-test.c"
fi

View file

@ -426,7 +426,9 @@ static void test_acpi_asl(test_data *data)
fprintf(stderr,
"acpi-test: Warning! %.4s binary file mismatch. "
"Actual [aml:%s], Expected [aml:%s].\n",
"Actual [aml:%s], Expected [aml:%s].\n"
"See source file tests/qtest/bios-tables-test.c "
"for instructions on how to update expected files.\n",
exp_sdt->aml, sdt->aml_file, exp_sdt->aml_file);
all_tables_match = all_tables_match &&
@ -461,21 +463,20 @@ static void test_acpi_asl(test_data *data)
"Actual [asl:%s, aml:%s], Expected [asl:%s, aml:%s].\n",
exp_sdt->aml, sdt->asl_file, sdt->aml_file,
exp_sdt->asl_file, exp_sdt->aml_file);
fflush(stderr);
if (getenv("V")) {
const char *diff_cmd = getenv("DIFF");
if (diff_cmd) {
int ret G_GNUC_UNUSED;
char *diff = g_strdup_printf("%s %s %s", diff_cmd,
exp_sdt->asl_file, sdt->asl_file);
ret = system(diff) ;
g_free(diff);
} else {
fprintf(stderr, "acpi-test: Warning. not showing "
"difference since no diff utility is specified. "
"Set 'DIFF' environment variable to a preferred "
"diff utility and run 'make V=1 check' again to "
"see ASL difference.");
}
const char *diff_env = getenv("DIFF");
const char *diff_cmd = diff_env ? diff_env : "diff -u";
char *diff = g_strdup_printf("%s %s %s", diff_cmd,
exp_sdt->asl_file, sdt->asl_file);
int out = dup(STDOUT_FILENO);
int ret G_GNUC_UNUSED;
dup2(STDERR_FILENO, STDOUT_FILENO);
ret = system(diff) ;
dup2(out, STDOUT_FILENO);
close(out);
g_free(diff);
}
}
}