virtio,vdpa: features, cleanups, fixes

vdpa sim refactoring
 virtio mem  Big Block Mode support
 misc cleanus, fixes
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAl/gznEPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpu/cIAJSVWVCs/5KVfeOg6NQ5WRK48g58eZoaIS6z
 jr5iyCRfoQs3tQgcX0W02X3QwVwesnpepF9FChFwexlh+Te3tWXKaDj3eWBmlJVh
 Hg8bMOOiOqY7qh47LsGbmb2pnJ3Tg8uwuTz+w/6VDc43CQa7ganwSl0owqye3ecm
 IdGbIIXZQs55FCzM8hwOWWpjsp1C2lRtjefsOc5AbtFjzGk+7767YT+C73UgwcSi
 peHbD8YFJTInQj6JCbF7uYYAWHrOFAOssWE3OwKtZJdTdJvE7bMgSZaYvUgHMvFR
 gRycqxpLAg6vcuns4qjiYafrywvYwEvTkPIXmMG6IAgNYIPAxK0=
 =SmPb
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - vdpa sim refactoring

 - virtio mem: Big Block Mode support

 - misc cleanus, fixes

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (61 commits)
  vdpa: Use simpler version of ida allocation
  vdpa: Add missing comment for virtqueue count
  uapi: virtio_ids: add missing device type IDs from OASIS spec
  uapi: virtio_ids.h: consistent indentions
  vhost scsi: fix error return code in vhost_scsi_set_endpoint()
  virtio_ring: Fix two use after free bugs
  virtio_net: Fix error code in probe()
  virtio_ring: Cut and paste bugs in vring_create_virtqueue_packed()
  tools/virtio: add barrier for aarch64
  tools/virtio: add krealloc_array
  tools/virtio: include asm/bug.h
  vdpa/mlx5: Use write memory barrier after updating CQ index
  vdpa: split vdpasim to core and net modules
  vdpa_sim: split vdpasim_virtqueue's iov field in out_iov and in_iov
  vdpa_sim: make vdpasim->buffer size configurable
  vdpa_sim: use kvmalloc to allocate vdpasim->buffer
  vdpa_sim: set vringh notify callback
  vdpa_sim: add set_config callback in vdpasim_dev_attr
  vdpa_sim: add get_config callback in vdpasim_dev_attr
  vdpa_sim: make 'config' generic and usable for any device type
  ...
This commit is contained in:
Linus Torvalds 2020-12-24 12:06:46 -08:00
commit 64145482d3
19 changed files with 1843 additions and 810 deletions

View file

@ -3072,6 +3072,7 @@ static int virtnet_probe(struct virtio_device *vdev)
dev_err(&vdev->dev,
"device MTU appears to have changed it is now %d < %d",
mtu, dev->min_mtu);
err = -EINVAL;
goto free;
}

View file

@ -9,21 +9,24 @@ menuconfig VDPA
if VDPA
config VDPA_SIM
tristate "vDPA device simulator"
tristate "vDPA device simulator core"
depends on RUNTIME_TESTING_MENU && HAS_DMA
select DMA_OPS
select VHOST_RING
select GENERIC_NET_UTILS
default n
help
vDPA networking device simulator which loop TX traffic back
to RX. This device is used for testing, prototyping and
development of vDPA.
Enable this module to support vDPA device simulators. These devices
are used for testing, prototyping and development of vDPA.
config VDPA_SIM_NET
tristate "vDPA simulator for networking device"
depends on VDPA_SIM
select GENERIC_NET_UTILS
help
vDPA networking device simulator which loops TX traffic back to RX.
config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
default n
help
This kernel module can drive Intel IFC VF NIC to offload
virtio dataplane traffic to hardware.
@ -42,7 +45,6 @@ config MLX5_VDPA_NET
tristate "vDPA driver for ConnectX devices"
select MLX5_VDPA
depends on MLX5_CORE
default n
help
VDPA network driver for ConnectX6 and newer. Provides offloading
of virtio net datapath such that descriptors put on the ring will

View file

@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return ret;
}
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) {
IFCVF_ERR(pdev, "No usable DMA confiugration\n");
return ret;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret) {
IFCVF_ERR(pdev,
"No usable coherent DMA confiugration\n");
IFCVF_ERR(pdev, "No usable DMA configuration\n");
return ret;
}

View file

@ -479,6 +479,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
{
mlx5_cq_set_ci(&mvq->cq.mcq);
/* make sure CQ cosumer update is visible to the hardware before updating
* RX doorbell record.
*/
dma_wmb();
rx_post(&mvq->vqqp, num);
if (mvq->event_cb.callback)
mvq->event_cb.callback(mvq->event_cb.private);

View file

@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
if (!vdev)
goto err;
err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
err = ida_alloc(&vdpa_index_ida, GFP_KERNEL);
if (err < 0)
goto err_ida;

View file

@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o

View file

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* VDPA networking device simulator.
* VDPA device simulator core.
*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
@ -11,97 +11,32 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/iommu.h>
#include <linux/dma-map-ops.h>
#include <linux/sysfs.h>
#include <linux/file.h>
#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
#include <linux/vhost_iotlb.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_net.h>
#include "vdpa_sim.h"
#define DRV_VERSION "0.1"
#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
#define DRV_DESC "vDPA Device Simulator"
#define DRV_DESC "vDPA Device Simulator core"
#define DRV_LICENSE "GPL v2"
static int batch_mapping = 1;
module_param(batch_mapping, int, 0444);
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
struct vdpasim_virtqueue {
struct vringh vring;
struct vringh_kiov iov;
unsigned short head;
bool ready;
u64 desc_addr;
u64 device_addr;
u64 driver_addr;
u32 num;
void *private;
irqreturn_t (*cb)(void *data);
};
static int max_iotlb_entries = 2048;
module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries. 0 means unlimited. (default: 2048)");
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
#define VDPASIM_DEVICE_ID 0x1
#define VDPASIM_VENDOR_ID 0
#define VDPASIM_VQ_NUM 0x2
#define VDPASIM_NAME "vdpasim-netdev"
static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
(1ULL << VIRTIO_F_VERSION_1) |
(1ULL << VIRTIO_F_ACCESS_PLATFORM) |
(1ULL << VIRTIO_NET_F_MAC);
/* State of each vdpasim device */
struct vdpasim {
struct vdpa_device vdpa;
struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM];
struct work_struct work;
/* spinlock to synchronize virtqueue state */
spinlock_t lock;
struct virtio_net_config config;
struct vhost_iotlb *iommu;
void *buffer;
u32 status;
u32 generation;
u64 features;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
{
return virtio_legacy_is_little_endian() ||
(vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
}
static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
{
return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
{
return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
}
static struct vdpasim *vdpasim_dev;
static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
{
@ -115,20 +50,34 @@ static struct vdpasim *dev_to_sim(struct device *dev)
return vdpa_to_sim(vdpa);
}
static void vdpasim_vq_notify(struct vringh *vring)
{
struct vdpasim_virtqueue *vq =
container_of(vring, struct vdpasim_virtqueue, vring);
if (!vq->cb)
return;
vq->cb(vq->private);
}
static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
vringh_init_iotlb(&vq->vring, vdpasim_features,
vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
VDPASIM_QUEUE_MAX, false,
(struct vring_desc *)(uintptr_t)vq->desc_addr,
(struct vring_avail *)
(uintptr_t)vq->driver_addr,
(struct vring_used *)
(uintptr_t)vq->device_addr);
vq->vring.notify = vdpasim_vq_notify;
}
static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
static void vdpasim_vq_reset(struct vdpasim *vdpasim,
struct vdpasim_virtqueue *vq)
{
vq->ready = false;
vq->desc_addr = 0;
@ -136,16 +85,18 @@ static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
vq->device_addr = 0;
vq->cb = NULL;
vq->private = NULL;
vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX,
false, NULL, NULL, NULL);
vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
vq->vring.notify = NULL;
}
static void vdpasim_reset(struct vdpasim *vdpasim)
{
int i;
for (i = 0; i < VDPASIM_VQ_NUM; i++)
vdpasim_vq_reset(&vdpasim->vqs[i]);
for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]);
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_reset(vdpasim->iommu);
@ -156,80 +107,6 @@ static void vdpasim_reset(struct vdpasim *vdpasim)
++vdpasim->generation;
}
static void vdpasim_work(struct work_struct *work)
{
struct vdpasim *vdpasim = container_of(work, struct
vdpasim, work);
struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
ssize_t read, write;
size_t total_write;
int pkts = 0;
int err;
spin_lock(&vdpasim->lock);
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
if (!txq->ready || !rxq->ready)
goto out;
while (true) {
total_write = 0;
err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL,
&txq->head, GFP_ATOMIC);
if (err <= 0)
break;
err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov,
&rxq->head, GFP_ATOMIC);
if (err <= 0) {
vringh_complete_iotlb(&txq->vring, txq->head, 0);
break;
}
while (true) {
read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov,
vdpasim->buffer,
PAGE_SIZE);
if (read <= 0)
break;
write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov,
vdpasim->buffer, read);
if (write <= 0)
break;
total_write += write;
}
/* Make sure data is wrote before advancing index */
smp_wmb();
vringh_complete_iotlb(&txq->vring, txq->head, 0);
vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
/* Make sure used is visible before rasing the interrupt. */
smp_wmb();
local_bh_disable();
if (txq->cb)
txq->cb(txq->private);
if (rxq->cb)
rxq->cb(rxq->private);
local_bh_enable();
if (++pkts > 4) {
schedule_work(&vdpasim->work);
goto out;
}
}
out:
spin_unlock(&vdpasim->lock);
}
static int dir_to_perm(enum dma_data_direction dir)
{
int perm = -EFAULT;
@ -342,26 +219,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
.free = vdpasim_free_coherent,
};
static const struct vdpa_config_ops vdpasim_net_config_ops;
static const struct vdpa_config_ops vdpasim_net_batch_config_ops;
static const struct vdpa_config_ops vdpasim_config_ops;
static const struct vdpa_config_ops vdpasim_batch_config_ops;
static struct vdpasim *vdpasim_create(void)
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
{
const struct vdpa_config_ops *ops;
struct vdpasim *vdpasim;
struct device *dev;
int ret = -ENOMEM;
int i, ret = -ENOMEM;
if (batch_mapping)
ops = &vdpasim_net_batch_config_ops;
ops = &vdpasim_batch_config_ops;
else
ops = &vdpasim_net_config_ops;
ops = &vdpasim_config_ops;
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM);
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
dev_attr->nvqs);
if (!vdpasim)
goto err_alloc;
INIT_WORK(&vdpasim->work, vdpasim_work);
vdpasim->dev_attr = *dev_attr;
INIT_WORK(&vdpasim->work, dev_attr->work_fn);
spin_lock_init(&vdpasim->lock);
spin_lock_init(&vdpasim->iommu_lock);
@ -371,31 +250,27 @@ static struct vdpasim *vdpasim_create(void)
goto err_iommu;
set_dma_ops(dev, &vdpasim_dma_ops);
vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
if (!vdpasim->config)
goto err_iommu;
vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue),
GFP_KERNEL);
if (!vdpasim->vqs)
goto err_iommu;
vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0);
if (!vdpasim->iommu)
goto err_iommu;
vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
if (!vdpasim->buffer)
goto err_iommu;
if (macaddr) {
mac_pton(macaddr, vdpasim->config.mac);
if (!is_valid_ether_addr(vdpasim->config.mac)) {
ret = -EADDRNOTAVAIL;
goto err_iommu;
}
} else {
eth_random_addr(vdpasim->config.mac);
}
vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
for (i = 0; i < dev_attr->nvqs; i++)
vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu);
vdpasim->vdpa.dma_dev = dev;
ret = vdpa_register_device(&vdpasim->vdpa);
if (ret)
goto err_iommu;
return vdpasim;
@ -404,6 +279,7 @@ static struct vdpasim *vdpasim_create(void)
err_alloc:
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(vdpasim_create);
static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 desc_area, u64 driver_area,
@ -498,28 +374,21 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
static u64 vdpasim_get_features(struct vdpa_device *vdpa)
{
return vdpasim_features;
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
return vdpasim->dev_attr.supported_features;
}
static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
struct virtio_net_config *config = &vdpasim->config;
/* DMA mapping must be done by driver */
if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
return -EINVAL;
vdpasim->features = features & vdpasim_features;
vdpasim->features = features & vdpasim->dev_attr.supported_features;
/* We generally only know whether guest is using the legacy interface
* here, so generally that's the earliest we can set config fields.
* Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which
* implies VIRTIO_F_VERSION_1, but let's not try to be clever here.
*/
config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
return 0;
}
@ -536,7 +405,9 @@ static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa)
static u32 vdpasim_get_device_id(struct vdpa_device *vdpa)
{
return VDPASIM_DEVICE_ID;
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
return vdpasim->dev_attr.id;
}
static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa)
@ -572,14 +443,27 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
if (offset + len < sizeof(struct virtio_net_config))
memcpy(buf, (u8 *)&vdpasim->config + offset, len);
if (offset + len > vdpasim->dev_attr.config_size)
return;
if (vdpasim->dev_attr.get_config)
vdpasim->dev_attr.get_config(vdpasim, vdpasim->config);
memcpy(buf, vdpasim->config + offset, len);
}
static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
const void *buf, unsigned int len)
{
/* No writable config supportted by vdpasim */
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
if (offset + len > vdpasim->dev_attr.config_size)
return;
memcpy(vdpasim->config + offset, buf, len);
if (vdpasim->dev_attr.set_config)
vdpasim->dev_attr.set_config(vdpasim, vdpasim->config);
}
static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
@ -656,12 +540,14 @@ static void vdpasim_free(struct vdpa_device *vdpa)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
cancel_work_sync(&vdpasim->work);
kfree(vdpasim->buffer);
kvfree(vdpasim->buffer);
if (vdpasim->iommu)
vhost_iotlb_free(vdpasim->iommu);
kfree(vdpasim->vqs);
kfree(vdpasim->config);
}
static const struct vdpa_config_ops vdpasim_net_config_ops = {
static const struct vdpa_config_ops vdpasim_config_ops = {
.set_vq_address = vdpasim_set_vq_address,
.set_vq_num = vdpasim_set_vq_num,
.kick_vq = vdpasim_kick_vq,
@ -688,7 +574,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
.free = vdpasim_free,
};
static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.set_vq_address = vdpasim_set_vq_address,
.set_vq_num = vdpasim_set_vq_num,
.kick_vq = vdpasim_kick_vq,
@ -714,26 +600,6 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
.free = vdpasim_free,
};
static int __init vdpasim_dev_init(void)
{
vdpasim_dev = vdpasim_create();
if (!IS_ERR(vdpasim_dev))
return 0;
return PTR_ERR(vdpasim_dev);
}
static void __exit vdpasim_dev_exit(void)
{
struct vdpa_device *vdpa = &vdpasim_dev->vdpa;
vdpa_unregister_device(vdpa);
}
module_init(vdpasim_dev_init)
module_exit(vdpasim_dev_exit)
MODULE_VERSION(DRV_VERSION);
MODULE_LICENSE(DRV_LICENSE);
MODULE_AUTHOR(DRV_AUTHOR);

View file

@ -0,0 +1,105 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
*/
#ifndef _VDPA_SIM_H
#define _VDPA_SIM_H
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
#include <linux/vhost_iotlb.h>
#include <uapi/linux/virtio_config.h>
#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \
(1ULL << VIRTIO_F_VERSION_1) | \
(1ULL << VIRTIO_F_ACCESS_PLATFORM))
struct vdpasim;
struct vdpasim_virtqueue {
struct vringh vring;
struct vringh_kiov in_iov;
struct vringh_kiov out_iov;
unsigned short head;
bool ready;
u64 desc_addr;
u64 device_addr;
u64 driver_addr;
u32 num;
void *private;
irqreturn_t (*cb)(void *data);
};
struct vdpasim_dev_attr {
u64 supported_features;
size_t config_size;
size_t buffer_size;
int nvqs;
u32 id;
work_func_t work_fn;
void (*get_config)(struct vdpasim *vdpasim, void *config);
void (*set_config)(struct vdpasim *vdpasim, const void *config);
};
/* State of each vdpasim device */
struct vdpasim {
struct vdpa_device vdpa;
struct vdpasim_virtqueue *vqs;
struct work_struct work;
struct vdpasim_dev_attr dev_attr;
/* spinlock to synchronize virtqueue state */
spinlock_t lock;
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
void *buffer;
u32 status;
u32 generation;
u64 features;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr);
/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
{
return virtio_legacy_is_little_endian() ||
(vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
}
static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
{
return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
{
return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
}
static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val)
{
return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val)
{
return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val);
}
static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val)
{
return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val);
}
static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val)
{
return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val);
}
#endif

View file

@ -0,0 +1,177 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* VDPA simulator for networking device.
*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
#include "vdpa_sim.h"
#define DRV_VERSION "0.1"
#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>"
#define DRV_DESC "vDPA Device Simulator for networking device"
#define DRV_LICENSE "GPL v2"
#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
(1ULL << VIRTIO_NET_F_MAC))
#define VDPASIM_NET_VQ_NUM 2
static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
u8 macaddr_buf[ETH_ALEN];
static struct vdpasim *vdpasim_net_dev;
static void vdpasim_net_work(struct work_struct *work)
{
struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
ssize_t read, write;
size_t total_write;
int pkts = 0;
int err;
spin_lock(&vdpasim->lock);
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
if (!txq->ready || !rxq->ready)
goto out;
while (true) {
total_write = 0;
err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
&txq->head, GFP_ATOMIC);
if (err <= 0)
break;
err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
&rxq->head, GFP_ATOMIC);
if (err <= 0) {
vringh_complete_iotlb(&txq->vring, txq->head, 0);
break;
}
while (true) {
read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
vdpasim->buffer,
PAGE_SIZE);
if (read <= 0)
break;
write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
vdpasim->buffer, read);
if (write <= 0)
break;
total_write += write;
}
/* Make sure data is wrote before advancing index */
smp_wmb();
vringh_complete_iotlb(&txq->vring, txq->head, 0);
vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
/* Make sure used is visible before rasing the interrupt. */
smp_wmb();
local_bh_disable();
if (vringh_need_notify_iotlb(&txq->vring) > 0)
vringh_notify(&txq->vring);
if (vringh_need_notify_iotlb(&rxq->vring) > 0)
vringh_notify(&rxq->vring);
local_bh_enable();
if (++pkts > 4) {
schedule_work(&vdpasim->work);
goto out;
}
}
out:
spin_unlock(&vdpasim->lock);
}
static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
{
struct virtio_net_config *net_config =
(struct virtio_net_config *)config;
net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
}
static int __init vdpasim_net_init(void)
{
struct vdpasim_dev_attr dev_attr = {};
int ret;
if (macaddr) {
mac_pton(macaddr, macaddr_buf);
if (!is_valid_ether_addr(macaddr_buf)) {
ret = -EADDRNOTAVAIL;
goto out;
}
} else {
eth_random_addr(macaddr_buf);
}
dev_attr.id = VIRTIO_ID_NET;
dev_attr.supported_features = VDPASIM_NET_FEATURES;
dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
dev_attr.config_size = sizeof(struct virtio_net_config);
dev_attr.get_config = vdpasim_net_get_config;
dev_attr.work_fn = vdpasim_net_work;
dev_attr.buffer_size = PAGE_SIZE;
vdpasim_net_dev = vdpasim_create(&dev_attr);
if (IS_ERR(vdpasim_net_dev)) {
ret = PTR_ERR(vdpasim_net_dev);
goto out;
}
ret = vdpa_register_device(&vdpasim_net_dev->vdpa);
if (ret)
goto put_dev;
return 0;
put_dev:
put_device(&vdpasim_net_dev->vdpa.dev);
out:
return ret;
}
static void __exit vdpasim_net_exit(void)
{
struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa;
vdpa_unregister_device(vdpa);
}
module_init(vdpasim_net_init);
module_exit(vdpasim_net_exit);
MODULE_VERSION(DRV_VERSION);
MODULE_LICENSE(DRV_LICENSE);
MODULE_AUTHOR(DRV_AUTHOR);
MODULE_DESCRIPTION(DRV_DESC);

View file

@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
if (!vhost_vq_is_setup(vq))
continue;
if (vhost_scsi_setup_vq_cmds(vq, vq->num))
ret = vhost_scsi_setup_vq_cmds(vq, vq->num);
if (ret)
goto destroy_vq_cmds;
}

View file

@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return -EFAULT;
if (vhost_vdpa_config_validate(v, &config))
return -EINVAL;
buf = kvzalloc(config.len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
if (copy_from_user(buf, c->buf, config.len)) {
kvfree(buf);
return -EFAULT;
}
buf = vmemdup_user(c->buf, config.len);
if (IS_ERR(buf))
return PTR_ERR(buf);
ops->set_config(vdpa, config.off, buf, config.len);

File diff suppressed because it is too large Load diff

View file

@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
vq->num_added = 0;
vq->packed_ring = true;
vq->use_dma_api = vring_use_dma_api(vdev);
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
cpu_to_le16(vq->packed.event_flags_shadow);
}
list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq;
err_desc_extra:
@ -1676,9 +1676,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
err_desc_state:
kfree(vq);
err_vq:
vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
err_device:
vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
err_driver:
vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
err_ring:
@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->last_used_idx = 0;
vq->num_added = 0;
vq->use_dma_api = vring_use_dma_api(vdev);
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split));
list_add_tail(&vq->vq.list, &vdev->vqs);
return &vq->vq;
}
EXPORT_SYMBOL_GPL(__vring_new_virtqueue);

View file

@ -42,6 +42,7 @@ struct vdpa_vq_state {
* @config: the configuration ops for this device.
* @index: device index
* @features_valid: were features initialized? for legacy guests
* @nvqs: maximum number of supported virtqueues
*/
struct vdpa_device {
struct device dev;

View file

@ -29,24 +29,30 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. */
#define VIRTIO_ID_NET 1 /* virtio net */
#define VIRTIO_ID_BLOCK 2 /* virtio block */
#define VIRTIO_ID_CONSOLE 3 /* virtio console */
#define VIRTIO_ID_RNG 4 /* virtio rng */
#define VIRTIO_ID_BALLOON 5 /* virtio balloon */
#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */
#define VIRTIO_ID_SCSI 8 /* virtio scsi */
#define VIRTIO_ID_9P 9 /* 9p virtio console */
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#define VIRTIO_ID_MEM 24 /* virtio mem */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
#define VIRTIO_ID_NET 1 /* virtio net */
#define VIRTIO_ID_BLOCK 2 /* virtio block */
#define VIRTIO_ID_CONSOLE 3 /* virtio console */
#define VIRTIO_ID_RNG 4 /* virtio rng */
#define VIRTIO_ID_BALLOON 5 /* virtio balloon */
#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */
#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */
#define VIRTIO_ID_SCSI 8 /* virtio scsi */
#define VIRTIO_ID_9P 9 /* 9p virtio console */
#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */
#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#define VIRTIO_ID_MEM 24 /* virtio mem */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
#endif /* _LINUX_VIRTIO_IDS_H */

View file

@ -1784,39 +1784,112 @@ int remove_memory(int nid, u64 start, u64 size)
}
EXPORT_SYMBOL_GPL(remove_memory);
static int try_offline_memory_block(struct memory_block *mem, void *arg)
{
uint8_t online_type = MMOP_ONLINE_KERNEL;
uint8_t **online_types = arg;
struct page *page;
int rc;
/*
* Sense the online_type via the zone of the memory block. Offlining
* with multiple zones within one memory block will be rejected
* by offlining code ... so we don't care about that.
*/
page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
online_type = MMOP_ONLINE_MOVABLE;
rc = device_offline(&mem->dev);
/*
* Default is MMOP_OFFLINE - change it only if offlining succeeded,
* so try_reonline_memory_block() can do the right thing.
*/
if (!rc)
**online_types = online_type;
(*online_types)++;
/* Ignore if already offline. */
return rc < 0 ? rc : 0;
}
static int try_reonline_memory_block(struct memory_block *mem, void *arg)
{
uint8_t **online_types = arg;
int rc;
if (**online_types != MMOP_OFFLINE) {
mem->online_type = **online_types;
rc = device_online(&mem->dev);
if (rc < 0)
pr_warn("%s: Failed to re-online memory: %d",
__func__, rc);
}
/* Continue processing all remaining memory blocks. */
(*online_types)++;
return 0;
}
/*
* Try to offline and remove a memory block. Might take a long time to
* finish in case memory is still in use. Primarily useful for memory devices
* that logically unplugged all memory (so it's no longer in use) and want to
* offline + remove the memory block.
* Try to offline and remove memory. Might take a long time to finish in case
* memory is still in use. Primarily useful for memory devices that logically
* unplugged all memory (so it's no longer in use) and want to offline + remove
* that memory.
*/
int offline_and_remove_memory(int nid, u64 start, u64 size)
{
struct memory_block *mem;
int rc = -EINVAL;
const unsigned long mb_count = size / memory_block_size_bytes();
uint8_t *online_types, *tmp;
int rc;
if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
size != memory_block_size_bytes())
return rc;
lock_device_hotplug();
mem = find_memory_block(__pfn_to_section(PFN_DOWN(start)));
if (mem)
rc = device_offline(&mem->dev);
/* Ignore if the device is already offline. */
if (rc > 0)
rc = 0;
!IS_ALIGNED(size, memory_block_size_bytes()) || !size)
return -EINVAL;
/*
* In case we succeeded to offline the memory block, remove it.
* We'll remember the old online type of each memory block, so we can
* try to revert whatever we did when offlining one memory block fails
* after offlining some others succeeded.
*/
online_types = kmalloc_array(mb_count, sizeof(*online_types),
GFP_KERNEL);
if (!online_types)
return -ENOMEM;
/*
* Initialize all states to MMOP_OFFLINE, so when we abort processing in
* try_offline_memory_block(), we'll skip all unprocessed blocks in
* try_reonline_memory_block().
*/
memset(online_types, MMOP_OFFLINE, mb_count);
lock_device_hotplug();
tmp = online_types;
rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
/*
* In case we succeeded to offline all memory, remove it.
* This cannot fail as it cannot get onlined in the meantime.
*/
if (!rc) {
rc = try_remove_memory(nid, start, size);
WARN_ON_ONCE(rc);
if (rc)
pr_err("%s: Failed to remove memory: %d", __func__, rc);
}
/*
* Rollback what we did. While memory onlining might theoretically fail
* (nacked by a notifier), it barely ever happens.
*/
if (rc) {
tmp = online_types;
walk_memory_blocks(start, size, &tmp,
try_reonline_memory_block);
}
unlock_device_hotplug();
kfree(online_types);
return rc;
}
EXPORT_SYMBOL_GPL(offline_and_remove_memory);

View file

@ -16,6 +16,16 @@
# define mb() abort()
# define dma_rmb() abort()
# define dma_wmb() abort()
#elif defined(__aarch64__)
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define virt_mb() __sync_synchronize()
#define virt_rmb() dmb(ishld)
#define virt_wmb() dmb(ishst)
#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0)
/* Weak barriers should be used. If not - it's a bug */
# define mb() abort()
# define dma_rmb() abort()
# define dma_wmb() abort()
#else
#error Please fill in barrier macros
#endif

View file

@ -2,6 +2,8 @@
#ifndef BUG_H
#define BUG_H
#include <asm/bug.h>
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
#define BUILD_BUG_ON(x)

View file

@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/overflow.h>
#include <linux/list.h>
#include <linux/printk.h>
#include <linux/bug.h>
@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr)
# define unlikely(x) (__builtin_expect(!!(x), 0))
# endif
static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp)
{
size_t bytes;
if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
return NULL;
return krealloc(p, bytes, gfp);
}
#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#ifdef DEBUG
#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr)
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0)
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \