virtio: features, fixes

new vdpa features to allow creation and deletion of new devices
 virtio-blk support per-device queue depth
 fixes, cleanups all over the place
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmA3+oYPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpyXgIAL71dM1GjVwnJC/hZHRPeRKBLUVzj7bAILaO
 i4TKQj0rs5OjJPrbGJVrbTpiUXfef+D75lzKYmOnfk+f2UeYSR6XecnlWbLddI16
 RcMHQW6lt/M5WiyQjt71VH+gqtKIJLHDt3Ek1C0g8BjbFEWnpElAqdd/AWkzg9B9
 ibCVPQq9dk+A8ZtfZpFB7/ykykHY8ndNQS9RJQLtE8fLNifN3Cir+uUf+pFzjjbs
 PvukiN7BNqHXOCeoMpMttEuYGNR29jgZHbEm1hdnSQ55NIYqLMuhoD8eO114/CBz
 p4clSmzhVoSU0sfc3igcyCZoVtjRcebOAaep7OoaIBRlQ1MXht8=
 =YFEf
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - new vdpa features to allow creation and deletion of new devices

 - virtio-blk support per-device queue depth

 - fixes, cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (31 commits)
  virtio-input: add multi-touch support
  virtio_mmio: fix one typo
  vdpa/mlx5: fix param validation in mlx5_vdpa_get_config()
  virtio_net: Fix fall-through warnings for Clang
  virtio_input: Prevent EV_MSC/MSC_TIMESTAMP loop storm for MT.
  virtio-blk: support per-device queue depth
  virtio_vdpa: don't warn when fail to disable vq
  virtio-pci: introduce modern device module
  virito-pci-modern: rename map_capability() to vp_modern_map_capability()
  virtio-pci-modern: introduce helper to get notification offset
  virtio-pci-modern: introduce helper for getting queue nums
  virtio-pci-modern: introduce helper for setting/geting queue size
  virtio-pci-modern: introduce helper to set/get queue_enable
  virtio-pci-modern: introduce vp_modern_queue_address()
  virtio-pci-modern: introduce vp_modern_set_queue_vector()
  virtio-pci-modern: introduce vp_modern_generation()
  virtio-pci-modern: introduce helpers for setting and getting features
  virtio-pci-modern: introduce helpers for setting and getting status
  virtio-pci-modern: introduce helper to set config vector
  virtio-pci-modern: introduce vp_modern_remove()
  ...
This commit is contained in:
Linus Torvalds 2021-02-25 12:21:08 -08:00
commit ffc1759676
22 changed files with 1492 additions and 507 deletions

View file

@ -705,6 +705,7 @@ static int virtblk_probe(struct virtio_device *vdev)
u32 v, blk_size, max_size, sg_elems, opt_io_size;
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
unsigned int queue_depth;
if (!vdev->config->get) {
dev_err(&vdev->dev, "%s failure: config access disabled\n",
@ -756,16 +757,18 @@ static int virtblk_probe(struct virtio_device *vdev)
}
/* Default queue sizing is to fill the ring. */
if (!virtblk_queue_depth) {
virtblk_queue_depth = vblk->vqs[0].vq->num_free;
if (likely(!virtblk_queue_depth)) {
queue_depth = vblk->vqs[0].vq->num_free;
/* ... but without indirect descs, we use 2 descs per req */
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
virtblk_queue_depth /= 2;
queue_depth /= 2;
} else {
queue_depth = virtblk_queue_depth;
}
memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
vblk->tag_set.ops = &virtio_mq_ops;
vblk->tag_set.queue_depth = virtblk_queue_depth;
vblk->tag_set.queue_depth = queue_depth;
vblk->tag_set.numa_node = NUMA_NO_NODE;
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
vblk->tag_set.cmd_size =

View file

@ -729,6 +729,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
case XDP_DROP:
goto err_xdp;
}

View file

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
menuconfig VDPA
tristate "vDPA drivers"
depends on NET
help
Enable this module to support vDPA device that uses a
datapath which complies with virtio specifications with

View file

@ -432,7 +432,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops,
IFCVF_MAX_QUEUE_PAIRS * 2);
IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
if (adapter == NULL) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return -ENOMEM;

View file

@ -1820,7 +1820,7 @@ static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
if (offset + len < sizeof(struct virtio_net_config))
if (offset + len <= sizeof(struct virtio_net_config))
memcpy(buf, (u8 *)&ndev->config + offset, len);
}
@ -1982,7 +1982,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2 * mlx5_vdpa_max_qps(max_vqs));
2 * mlx5_vdpa_max_qps(max_vqs), NULL);
if (IS_ERR(ndev))
return PTR_ERR(ndev);

View file

@ -11,9 +11,17 @@
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/vdpa.h>
#include <uapi/linux/vdpa.h>
#include <net/genetlink.h>
#include <linux/mod_devicetable.h>
static LIST_HEAD(mdev_head);
/* A global mutex that protects vdpa management device and device level operations. */
static DEFINE_MUTEX(vdpa_dev_mutex);
static DEFINE_IDA(vdpa_index_ida);
static struct genl_family vdpa_nl_family;
static int vdpa_dev_probe(struct device *d)
{
struct vdpa_device *vdev = dev_to_vdpa(d);
@ -63,6 +71,7 @@ static void vdpa_release_dev(struct device *d)
* @config: the bus operations that is supported by this device
* @nvqs: number of virtqueues supported by this device
* @size: size of the parent structure that contains private data
* @name: name of the vdpa device; optional.
*
* Driver should use vdpa_alloc_device() wrapper macro instead of
* using this directly.
@ -72,8 +81,7 @@ static void vdpa_release_dev(struct device *d)
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
int nvqs,
size_t size)
int nvqs, size_t size, const char *name)
{
struct vdpa_device *vdev;
int err = -EINVAL;
@ -101,7 +109,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
vdev->features_valid = false;
vdev->nvqs = nvqs;
err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
if (name)
err = dev_set_name(&vdev->dev, "%s", name);
else
err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
if (err)
goto err_name;
@ -118,6 +129,44 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
}
EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
static int vdpa_name_match(struct device *dev, const void *data)
{
struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
return (strcmp(dev_name(&vdev->dev), data) == 0);
}
static int __vdpa_register_device(struct vdpa_device *vdev)
{
struct device *dev;
lockdep_assert_held(&vdpa_dev_mutex);
dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match);
if (dev) {
put_device(dev);
return -EEXIST;
}
return device_add(&vdev->dev);
}
/**
* _vdpa_register_device - register a vDPA device with vdpa lock held
* Caller must have a succeed call of vdpa_alloc_device() before.
* Caller must invoke this routine in the management device dev_add()
* callback after setting up valid mgmtdev for this vdpa device.
* @vdev: the vdpa device to be registered to vDPA bus
*
* Returns an error when fail to add device to vDPA bus
*/
int _vdpa_register_device(struct vdpa_device *vdev)
{
if (!vdev->mdev)
return -EINVAL;
return __vdpa_register_device(vdev);
}
EXPORT_SYMBOL_GPL(_vdpa_register_device);
/**
* vdpa_register_device - register a vDPA device
* Callers must have a succeed call of vdpa_alloc_device() before.
@ -127,17 +176,38 @@ EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
*/
int vdpa_register_device(struct vdpa_device *vdev)
{
return device_add(&vdev->dev);
int err;
mutex_lock(&vdpa_dev_mutex);
err = __vdpa_register_device(vdev);
mutex_unlock(&vdpa_dev_mutex);
return err;
}
EXPORT_SYMBOL_GPL(vdpa_register_device);
/**
* _vdpa_unregister_device - unregister a vDPA device
* Caller must invoke this routine as part of management device dev_del()
* callback.
* @vdev: the vdpa device to be unregisted from vDPA bus
*/
void _vdpa_unregister_device(struct vdpa_device *vdev)
{
lockdep_assert_held(&vdpa_dev_mutex);
WARN_ON(!vdev->mdev);
device_unregister(&vdev->dev);
}
EXPORT_SYMBOL_GPL(_vdpa_unregister_device);
/**
* vdpa_unregister_device - unregister a vDPA device
* @vdev: the vdpa device to be unregisted from vDPA bus
*/
void vdpa_unregister_device(struct vdpa_device *vdev)
{
mutex_lock(&vdpa_dev_mutex);
device_unregister(&vdev->dev);
mutex_unlock(&vdpa_dev_mutex);
}
EXPORT_SYMBOL_GPL(vdpa_unregister_device);
@ -167,13 +237,436 @@ void vdpa_unregister_driver(struct vdpa_driver *drv)
}
EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
/**
* vdpa_mgmtdev_register - register a vdpa management device
*
* @mdev: Pointer to vdpa management device
* vdpa_mgmtdev_register() register a vdpa management device which supports
* vdpa device management.
*/
int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev)
{
if (!mdev->device || !mdev->ops || !mdev->ops->dev_add || !mdev->ops->dev_del)
return -EINVAL;
INIT_LIST_HEAD(&mdev->list);
mutex_lock(&vdpa_dev_mutex);
list_add_tail(&mdev->list, &mdev_head);
mutex_unlock(&vdpa_dev_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register);
static int vdpa_match_remove(struct device *dev, void *data)
{
struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
struct vdpa_mgmt_dev *mdev = vdev->mdev;
if (mdev == data)
mdev->ops->dev_del(mdev, vdev);
return 0;
}
void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
{
mutex_lock(&vdpa_dev_mutex);
list_del(&mdev->list);
/* Filter out all the entries belong to this management device and delete it. */
bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove);
mutex_unlock(&vdpa_dev_mutex);
}
EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev,
const char *busname, const char *devname)
{
/* Bus name is optional for simulated management device, so ignore the
* device with bus if bus attribute is provided.
*/
if ((busname && !mdev->device->bus) || (!busname && mdev->device->bus))
return false;
if (!busname && strcmp(dev_name(mdev->device), devname) == 0)
return true;
if (busname && (strcmp(mdev->device->bus->name, busname) == 0) &&
(strcmp(dev_name(mdev->device), devname) == 0))
return true;
return false;
}
static struct vdpa_mgmt_dev *vdpa_mgmtdev_get_from_attr(struct nlattr **attrs)
{
struct vdpa_mgmt_dev *mdev;
const char *busname = NULL;
const char *devname;
if (!attrs[VDPA_ATTR_MGMTDEV_DEV_NAME])
return ERR_PTR(-EINVAL);
devname = nla_data(attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]);
if (attrs[VDPA_ATTR_MGMTDEV_BUS_NAME])
busname = nla_data(attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]);
list_for_each_entry(mdev, &mdev_head, list) {
if (mgmtdev_handle_match(mdev, busname, devname))
return mdev;
}
return ERR_PTR(-ENODEV);
}
static int vdpa_nl_mgmtdev_handle_fill(struct sk_buff *msg, const struct vdpa_mgmt_dev *mdev)
{
if (mdev->device->bus &&
nla_put_string(msg, VDPA_ATTR_MGMTDEV_BUS_NAME, mdev->device->bus->name))
return -EMSGSIZE;
if (nla_put_string(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, dev_name(mdev->device)))
return -EMSGSIZE;
return 0;
}
static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *msg,
u32 portid, u32 seq, int flags)
{
u64 supported_classes = 0;
void *hdr;
int i = 0;
int err;
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_MGMTDEV_NEW);
if (!hdr)
return -EMSGSIZE;
err = vdpa_nl_mgmtdev_handle_fill(msg, mdev);
if (err)
goto msg_err;
while (mdev->id_table[i].device) {
supported_classes |= BIT(mdev->id_table[i].device);
i++;
}
if (nla_put_u64_64bit(msg, VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES,
supported_classes, VDPA_ATTR_UNSPEC)) {
err = -EMSGSIZE;
goto msg_err;
}
genlmsg_end(msg, hdr);
return 0;
msg_err:
genlmsg_cancel(msg, hdr);
return err;
}
static int vdpa_nl_cmd_mgmtdev_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_mgmt_dev *mdev;
struct sk_buff *msg;
int err;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
mutex_lock(&vdpa_dev_mutex);
mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
if (IS_ERR(mdev)) {
mutex_unlock(&vdpa_dev_mutex);
NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device");
err = PTR_ERR(mdev);
goto out;
}
err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0);
mutex_unlock(&vdpa_dev_mutex);
if (err)
goto out;
err = genlmsg_reply(msg, info);
return err;
out:
nlmsg_free(msg);
return err;
}
static int
vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
{
struct vdpa_mgmt_dev *mdev;
int start = cb->args[0];
int idx = 0;
int err;
mutex_lock(&vdpa_dev_mutex);
list_for_each_entry(mdev, &mdev_head, list) {
if (idx < start) {
idx++;
continue;
}
err = vdpa_mgmtdev_fill(mdev, msg, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI);
if (err)
goto out;
idx++;
}
out:
mutex_unlock(&vdpa_dev_mutex);
cb->args[0] = idx;
return msg->len;
}
static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_mgmt_dev *mdev;
const char *name;
int err = 0;
if (!info->attrs[VDPA_ATTR_DEV_NAME])
return -EINVAL;
name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
mutex_lock(&vdpa_dev_mutex);
mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
if (IS_ERR(mdev)) {
NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device");
err = PTR_ERR(mdev);
goto err;
}
err = mdev->ops->dev_add(mdev, name);
err:
mutex_unlock(&vdpa_dev_mutex);
return err;
}
static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_mgmt_dev *mdev;
struct vdpa_device *vdev;
struct device *dev;
const char *name;
int err = 0;
if (!info->attrs[VDPA_ATTR_DEV_NAME])
return -EINVAL;
name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
mutex_lock(&vdpa_dev_mutex);
dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match);
if (!dev) {
NL_SET_ERR_MSG_MOD(info->extack, "device not found");
err = -ENODEV;
goto dev_err;
}
vdev = container_of(dev, struct vdpa_device, dev);
if (!vdev->mdev) {
NL_SET_ERR_MSG_MOD(info->extack, "Only user created device can be deleted by user");
err = -EINVAL;
goto mdev_err;
}
mdev = vdev->mdev;
mdev->ops->dev_del(mdev, vdev);
mdev_err:
put_device(dev);
dev_err:
mutex_unlock(&vdpa_dev_mutex);
return err;
}
static int
vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
int flags, struct netlink_ext_ack *extack)
{
u16 max_vq_size;
u32 device_id;
u32 vendor_id;
void *hdr;
int err;
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_NEW);
if (!hdr)
return -EMSGSIZE;
err = vdpa_nl_mgmtdev_handle_fill(msg, vdev->mdev);
if (err)
goto msg_err;
device_id = vdev->config->get_device_id(vdev);
vendor_id = vdev->config->get_vendor_id(vdev);
max_vq_size = vdev->config->get_vq_num_max(vdev);
err = -EMSGSIZE;
if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev)))
goto msg_err;
if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id))
goto msg_err;
if (nla_put_u32(msg, VDPA_ATTR_DEV_VENDOR_ID, vendor_id))
goto msg_err;
if (nla_put_u32(msg, VDPA_ATTR_DEV_MAX_VQS, vdev->nvqs))
goto msg_err;
if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size))
goto msg_err;
genlmsg_end(msg, hdr);
return 0;
msg_err:
genlmsg_cancel(msg, hdr);
return err;
}
static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_device *vdev;
struct sk_buff *msg;
const char *devname;
struct device *dev;
int err;
if (!info->attrs[VDPA_ATTR_DEV_NAME])
return -EINVAL;
devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
mutex_lock(&vdpa_dev_mutex);
dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
if (!dev) {
NL_SET_ERR_MSG_MOD(info->extack, "device not found");
err = -ENODEV;
goto err;
}
vdev = container_of(dev, struct vdpa_device, dev);
if (!vdev->mdev) {
err = -EINVAL;
goto mdev_err;
}
err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack);
if (!err)
err = genlmsg_reply(msg, info);
mdev_err:
put_device(dev);
err:
mutex_unlock(&vdpa_dev_mutex);
if (err)
nlmsg_free(msg);
return err;
}
struct vdpa_dev_dump_info {
struct sk_buff *msg;
struct netlink_callback *cb;
int start_idx;
int idx;
};
static int vdpa_dev_dump(struct device *dev, void *data)
{
struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
struct vdpa_dev_dump_info *info = data;
int err;
if (!vdev->mdev)
return 0;
if (info->idx < info->start_idx) {
info->idx++;
return 0;
}
err = vdpa_dev_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack);
if (err)
return err;
info->idx++;
return 0;
}
static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
{
struct vdpa_dev_dump_info info;
info.msg = msg;
info.cb = cb;
info.start_idx = cb->args[0];
info.idx = 0;
mutex_lock(&vdpa_dev_mutex);
bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump);
mutex_unlock(&vdpa_dev_mutex);
cb->args[0] = info.idx;
return msg->len;
}
static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
[VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
};
static const struct genl_ops vdpa_nl_ops[] = {
{
.cmd = VDPA_CMD_MGMTDEV_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_mgmtdev_get_doit,
.dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
},
{
.cmd = VDPA_CMD_DEV_NEW,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_add_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = VDPA_CMD_DEV_DEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_del_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = VDPA_CMD_DEV_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_get_doit,
.dumpit = vdpa_nl_cmd_dev_get_dumpit,
},
};
static struct genl_family vdpa_nl_family __ro_after_init = {
.name = VDPA_GENL_NAME,
.version = VDPA_GENL_VERSION,
.maxattr = VDPA_ATTR_MAX,
.policy = vdpa_nl_policy,
.netnsok = false,
.module = THIS_MODULE,
.ops = vdpa_nl_ops,
.n_ops = ARRAY_SIZE(vdpa_nl_ops),
};
static int vdpa_init(void)
{
return bus_register(&vdpa_bus);
int err;
err = bus_register(&vdpa_bus);
if (err)
return err;
err = genl_register_family(&vdpa_nl_family);
if (err)
goto err;
return 0;
err:
bus_unregister(&vdpa_bus);
return err;
}
static void __exit vdpa_exit(void)
{
genl_unregister_family(&vdpa_nl_family);
bus_unregister(&vdpa_bus);
ida_destroy(&vdpa_index_ida);
}

View file

@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
ops = &vdpasim_config_ops;
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
dev_attr->nvqs);
dev_attr->nvqs, dev_attr->name);
if (!vdpasim)
goto err_alloc;
@ -249,6 +249,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
goto err_iommu;
set_dma_ops(dev, &vdpasim_dma_ops);
vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
if (!vdpasim->config)

View file

@ -33,6 +33,8 @@ struct vdpasim_virtqueue {
};
struct vdpasim_dev_attr {
struct vdpa_mgmt_dev *mgmt_dev;
const char *name;
u64 supported_features;
size_t config_size;
size_t buffer_size;

View file

@ -33,9 +33,7 @@ static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
u8 macaddr_buf[ETH_ALEN];
static struct vdpasim *vdpasim_net_dev;
static u8 macaddr_buf[ETH_ALEN];
static void vdpasim_net_work(struct work_struct *work)
{
@ -120,21 +118,23 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
}
static int __init vdpasim_net_init(void)
static void vdpasim_net_mgmtdev_release(struct device *dev)
{
}
static struct device vdpasim_net_mgmtdev = {
.init_name = "vdpasim_net",
.release = vdpasim_net_mgmtdev_release,
};
static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
{
struct vdpasim_dev_attr dev_attr = {};
struct vdpasim *simdev;
int ret;
if (macaddr) {
mac_pton(macaddr, macaddr_buf);
if (!is_valid_ether_addr(macaddr_buf)) {
ret = -EADDRNOTAVAIL;
goto out;
}
} else {
eth_random_addr(macaddr_buf);
}
dev_attr.mgmt_dev = mdev;
dev_attr.name = name;
dev_attr.id = VIRTIO_ID_NET;
dev_attr.supported_features = VDPASIM_NET_FEATURES;
dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
@ -143,29 +143,75 @@ static int __init vdpasim_net_init(void)
dev_attr.work_fn = vdpasim_net_work;
dev_attr.buffer_size = PAGE_SIZE;
vdpasim_net_dev = vdpasim_create(&dev_attr);
if (IS_ERR(vdpasim_net_dev)) {
ret = PTR_ERR(vdpasim_net_dev);
goto out;
}
simdev = vdpasim_create(&dev_attr);
if (IS_ERR(simdev))
return PTR_ERR(simdev);
ret = vdpa_register_device(&vdpasim_net_dev->vdpa);
ret = _vdpa_register_device(&simdev->vdpa);
if (ret)
goto put_dev;
goto reg_err;
return 0;
put_dev:
put_device(&vdpasim_net_dev->vdpa.dev);
out:
reg_err:
put_device(&simdev->vdpa.dev);
return ret;
}
static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
struct vdpa_device *dev)
{
struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
_vdpa_unregister_device(&simdev->vdpa);
}
static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
.dev_add = vdpasim_net_dev_add,
.dev_del = vdpasim_net_dev_del
};
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static struct vdpa_mgmt_dev mgmt_dev = {
.device = &vdpasim_net_mgmtdev,
.id_table = id_table,
.ops = &vdpasim_net_mgmtdev_ops,
};
static int __init vdpasim_net_init(void)
{
int ret;
if (macaddr) {
mac_pton(macaddr, macaddr_buf);
if (!is_valid_ether_addr(macaddr_buf))
return -EADDRNOTAVAIL;
} else {
eth_random_addr(macaddr_buf);
}
ret = device_register(&vdpasim_net_mgmtdev);
if (ret)
return ret;
ret = vdpa_mgmtdev_register(&mgmt_dev);
if (ret)
goto parent_err;
return 0;
parent_err:
device_unregister(&vdpasim_net_mgmtdev);
return ret;
}
static void __exit vdpasim_net_exit(void)
{
struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa;
vdpa_unregister_device(vdpa);
vdpa_mgmtdev_unregister(&mgmt_dev);
device_unregister(&vdpasim_net_mgmtdev);
}
module_init(vdpasim_net_init);

View file

@ -1814,12 +1814,9 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
struct vhost_virtqueue **vqs;
int r = -ENOMEM, i;
vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
if (!vs) {
vs = vzalloc(sizeof(*vs));
if (!vs)
goto err_vs;
}
vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
goto err_vs;
vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL);
if (!vqs)

View file

@ -12,6 +12,14 @@ config ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
This option is selected if the architecture may need to enforce
VIRTIO_F_ACCESS_PLATFORM
config VIRTIO_PCI_LIB
tristate
help
Modern PCI device implementation. This module implements the
basic probe and control for devices which are based on modern
PCI device with possible vendor specific extensions. Any
module that selects this module must depend on PCI.
menuconfig VIRTIO_MENU
bool "Virtio drivers"
default y
@ -21,6 +29,7 @@ if VIRTIO_MENU
config VIRTIO_PCI
tristate "PCI driver for virtio devices"
depends on PCI
select VIRTIO_PCI_LIB
select VIRTIO
help
This driver provides support for virtio based paravirtual device

View file

@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o
obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o

View file

@ -7,6 +7,7 @@
#include <uapi/linux/virtio_ids.h>
#include <uapi/linux/virtio_input.h>
#include <linux/input/mt.h>
struct virtio_input {
struct virtio_device *vdev;
@ -64,6 +65,21 @@ static int virtinput_send_status(struct virtio_input *vi,
unsigned long flags;
int rc;
/*
* Since 29cc309d8bf1 (HID: hid-multitouch: forward MSC_TIMESTAMP),
* EV_MSC/MSC_TIMESTAMP is added to each before EV_SYN event.
* EV_MSC is configured as INPUT_PASS_TO_ALL.
* In case of touch device:
* BE pass EV_MSC/MSC_TIMESTAMP to FE on receiving event from evdev.
* FE pass EV_MSC/MSC_TIMESTAMP back to BE.
* BE writes EV_MSC/MSC_TIMESTAMP to evdev due to INPUT_PASS_TO_ALL.
* BE receives extra EV_MSC/MSC_TIMESTAMP and pass to FE.
* >>> Each new frame becomes larger and larger.
* Disable EV_MSC/MSC_TIMESTAMP forwarding for MT.
*/
if (vi->idev->mt && type == EV_MSC && code == MSC_TIMESTAMP)
return 0;
stsbuf = kzalloc(sizeof(*stsbuf), GFP_ATOMIC);
if (!stsbuf)
return -ENOMEM;
@ -204,7 +220,7 @@ static int virtinput_probe(struct virtio_device *vdev)
struct virtio_input *vi;
unsigned long flags;
size_t size;
int abs, err;
int abs, err, nslots;
if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
return -ENODEV;
@ -289,6 +305,13 @@ static int virtinput_probe(struct virtio_device *vdev)
continue;
virtinput_cfg_abs(vi, abs);
}
if (test_bit(ABS_MT_SLOT, vi->idev->absbit)) {
nslots = input_abs_get_max(vi->idev, ABS_MT_SLOT) + 1;
err = input_mt_init_slots(vi->idev, nslots, 0);
if (err)
goto err_mt_init_slots;
}
}
virtio_device_ready(vdev);
@ -304,6 +327,7 @@ static int virtinput_probe(struct virtio_device *vdev)
spin_lock_irqsave(&vi->lock, flags);
vi->ready = false;
spin_unlock_irqrestore(&vi->lock, flags);
err_mt_init_slots:
input_free_device(vi->idev);
err_input_alloc:
vdev->config->del_vqs(vdev);

View file

@ -2577,7 +2577,7 @@ static int virtio_mem_probe(struct virtio_device *vdev)
* actually in use (e.g., trying to reload the driver).
*/
if (vm->plugged_size) {
vm->unplug_all_required = 1;
vm->unplug_all_required = true;
dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
}

View file

@ -126,7 +126,7 @@ static int vm_finalize_features(struct virtio_device *vdev)
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
/* Make sure there is are no mixed devices */
/* Make sure there are no mixed devices */
if (vm_dev->version == 2 &&
!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
dev_err(&vdev->dev, "New virtio-mmio devices (version 2) must provide VIRTIO_F_VERSION_1 feature!\n");

View file

@ -25,6 +25,7 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/virtio_pci_modern.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
@ -43,31 +44,12 @@ struct virtio_pci_vq_info {
struct virtio_pci_device {
struct virtio_device vdev;
struct pci_dev *pci_dev;
struct virtio_pci_modern_device mdev;
/* In legacy mode, these two point to within ->legacy. */
/* Where to read and clear interrupt */
u8 __iomem *isr;
/* Modern only fields */
/* The IO mapping for the PCI config space (non-legacy mode) */
struct virtio_pci_common_cfg __iomem *common;
/* Device-specific data (non-legacy mode) */
void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
/* So we can sanity-check accesses. */
size_t notify_len;
size_t device_len;
/* Capability for when we need to map notifications per-vq. */
int notify_map_cap;
/* Multiply queue_notify_off by this value. (non-legacy mode). */
u32 notify_offset_multiplier;
int modern_bars;
/* Legacy only field */
/* the IO mapping for the PCI config space */
void __iomem *ioaddr;

View file

@ -19,136 +19,11 @@
#define VIRTIO_RING_NO_LEGACY
#include "virtio_pci_common.h"
/*
* Type-safe wrappers for io accesses.
* Use these to enforce at compile time the following spec requirement:
*
* The driver MUST access each field using the natural access
* method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
* for 16-bit fields and 8-bit accesses for 8-bit fields.
*/
static inline u8 vp_ioread8(const u8 __iomem *addr)
{
return ioread8(addr);
}
static inline u16 vp_ioread16 (const __le16 __iomem *addr)
{
return ioread16(addr);
}
static inline u32 vp_ioread32(const __le32 __iomem *addr)
{
return ioread32(addr);
}
static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
{
iowrite8(value, addr);
}
static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
{
iowrite16(value, addr);
}
static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
{
iowrite32(value, addr);
}
static void vp_iowrite64_twopart(u64 val,
__le32 __iomem *lo, __le32 __iomem *hi)
{
vp_iowrite32((u32)val, lo);
vp_iowrite32(val >> 32, hi);
}
static void __iomem *map_capability(struct pci_dev *dev, int off,
size_t minlen,
u32 align,
u32 start, u32 size,
size_t *len)
{
u8 bar;
u32 offset, length;
void __iomem *p;
pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
bar),
&bar);
pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
&offset);
pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
&length);
if (length <= start) {
dev_err(&dev->dev,
"virtio_pci: bad capability len %u (>%u expected)\n",
length, start);
return NULL;
}
if (length - start < minlen) {
dev_err(&dev->dev,
"virtio_pci: bad capability len %u (>=%zu expected)\n",
length, minlen);
return NULL;
}
length -= start;
if (start + offset < offset) {
dev_err(&dev->dev,
"virtio_pci: map wrap-around %u+%u\n",
start, offset);
return NULL;
}
offset += start;
if (offset & (align - 1)) {
dev_err(&dev->dev,
"virtio_pci: offset %u not aligned to %u\n",
offset, align);
return NULL;
}
if (length > size)
length = size;
if (len)
*len = length;
if (minlen + offset < minlen ||
minlen + offset > pci_resource_len(dev, bar)) {
dev_err(&dev->dev,
"virtio_pci: map virtio %zu@%u "
"out of range on bar %i length %lu\n",
minlen, offset,
bar, (unsigned long)pci_resource_len(dev, bar));
return NULL;
}
p = pci_iomap_range(dev, bar, offset, length);
if (!p)
dev_err(&dev->dev,
"virtio_pci: unable to map virtio %u@%u on bar %i\n",
length, offset, bar);
return p;
}
/* virtio config->get_features() implementation */
static u64 vp_get_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u64 features;
vp_iowrite32(0, &vp_dev->common->device_feature_select);
features = vp_ioread32(&vp_dev->common->device_feature);
vp_iowrite32(1, &vp_dev->common->device_feature_select);
features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
return features;
return vp_modern_get_features(&vp_dev->mdev);
}
static void vp_transport_features(struct virtio_device *vdev, u64 features)
@ -179,10 +54,7 @@ static int vp_finalize_features(struct virtio_device *vdev)
return -EINVAL;
}
vp_iowrite32(0, &vp_dev->common->guest_feature_select);
vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
vp_iowrite32(1, &vp_dev->common->guest_feature_select);
vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
vp_modern_set_features(&vp_dev->mdev, vdev->features);
return 0;
}
@ -192,29 +64,31 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
void __iomem *device = mdev->device;
u8 b;
__le16 w;
__le32 l;
BUG_ON(offset + len > vp_dev->device_len);
BUG_ON(offset + len > mdev->device_len);
switch (len) {
case 1:
b = ioread8(vp_dev->device + offset);
b = ioread8(device + offset);
memcpy(buf, &b, sizeof b);
break;
case 2:
w = cpu_to_le16(ioread16(vp_dev->device + offset));
w = cpu_to_le16(ioread16(device + offset));
memcpy(buf, &w, sizeof w);
break;
case 4:
l = cpu_to_le32(ioread32(vp_dev->device + offset));
l = cpu_to_le32(ioread32(device + offset));
memcpy(buf, &l, sizeof l);
break;
case 8:
l = cpu_to_le32(ioread32(vp_dev->device + offset));
l = cpu_to_le32(ioread32(device + offset));
memcpy(buf, &l, sizeof l);
l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
l = cpu_to_le32(ioread32(device + offset + sizeof l));
memcpy(buf + sizeof l, &l, sizeof l);
break;
default:
@ -228,30 +102,32 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
void __iomem *device = mdev->device;
u8 b;
__le16 w;
__le32 l;
BUG_ON(offset + len > vp_dev->device_len);
BUG_ON(offset + len > mdev->device_len);
switch (len) {
case 1:
memcpy(&b, buf, sizeof b);
iowrite8(b, vp_dev->device + offset);
iowrite8(b, device + offset);
break;
case 2:
memcpy(&w, buf, sizeof w);
iowrite16(le16_to_cpu(w), vp_dev->device + offset);
iowrite16(le16_to_cpu(w), device + offset);
break;
case 4:
memcpy(&l, buf, sizeof l);
iowrite32(le32_to_cpu(l), vp_dev->device + offset);
iowrite32(le32_to_cpu(l), device + offset);
break;
case 8:
memcpy(&l, buf, sizeof l);
iowrite32(le32_to_cpu(l), vp_dev->device + offset);
iowrite32(le32_to_cpu(l), device + offset);
memcpy(&l, buf + sizeof l, sizeof l);
iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
iowrite32(le32_to_cpu(l), device + offset + sizeof l);
break;
default:
BUG();
@ -261,35 +137,40 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
static u32 vp_generation(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_ioread8(&vp_dev->common->config_generation);
return vp_modern_generation(&vp_dev->mdev);
}
/* config->{get,set}_status() implementations */
static u8 vp_get_status(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_ioread8(&vp_dev->common->device_status);
return vp_modern_get_status(&vp_dev->mdev);
}
static void vp_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
vp_iowrite8(status, &vp_dev->common->device_status);
vp_modern_set_status(&vp_dev->mdev, status);
}
static void vp_reset(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
/* 0 status means a reset. */
vp_iowrite8(0, &vp_dev->common->device_status);
vp_modern_set_status(mdev, 0);
/* After writing 0 to device_status, the driver MUST wait for a read of
* device_status to return 0 before reinitializing the device.
* This will flush out the status write, and flush in device writes,
* including MSI-X interrupts, if any.
*/
while (vp_ioread8(&vp_dev->common->device_status))
while (vp_modern_get_status(mdev))
msleep(1);
/* Flush pending VQ/configuration callbacks. */
vp_synchronize_vectors(vdev);
@ -297,11 +178,7 @@ static void vp_reset(struct virtio_device *vdev)
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{
/* Setup the vector used for configuration events */
vp_iowrite16(vector, &vp_dev->common->msix_config);
/* Verify we had enough resources to assign the vector */
/* Will also flush the write out to device */
return vp_ioread16(&vp_dev->common->msix_config);
return vp_modern_config_vector(&vp_dev->mdev, vector);
}
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@ -312,20 +189,18 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
bool ctx,
u16 msix_vec)
{
struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtqueue *vq;
u16 num, off;
int err;
if (index >= vp_ioread16(&cfg->num_queues))
if (index >= vp_modern_get_num_queues(mdev))
return ERR_PTR(-ENOENT);
/* Select the queue we're interested in */
vp_iowrite16(index, &cfg->queue_select);
/* Check if queue is either not available or already active. */
num = vp_ioread16(&cfg->queue_size);
if (!num || vp_ioread16(&cfg->queue_enable))
num = vp_modern_get_queue_size(mdev, index);
if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT);
if (num & (num - 1)) {
@ -334,7 +209,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
}
/* get offset of notification word for this vq */
off = vp_ioread16(&cfg->queue_notify_off);
off = vp_modern_get_queue_notify_off(mdev, index);
info->msix_vector = msix_vec;
@ -347,33 +222,30 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
return ERR_PTR(-ENOMEM);
/* activate the queue */
vp_iowrite16(virtqueue_get_vring_size(vq), &cfg->queue_size);
vp_iowrite64_twopart(virtqueue_get_desc_addr(vq),
&cfg->queue_desc_lo, &cfg->queue_desc_hi);
vp_iowrite64_twopart(virtqueue_get_avail_addr(vq),
&cfg->queue_avail_lo, &cfg->queue_avail_hi);
vp_iowrite64_twopart(virtqueue_get_used_addr(vq),
&cfg->queue_used_lo, &cfg->queue_used_hi);
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
if (vp_dev->notify_base) {
if (mdev->notify_base) {
/* offset should not wrap */
if ((u64)off * vp_dev->notify_offset_multiplier + 2
> vp_dev->notify_len) {
dev_warn(&vp_dev->pci_dev->dev,
if ((u64)off * mdev->notify_offset_multiplier + 2
> mdev->notify_len) {
dev_warn(&mdev->pci_dev->dev,
"bad notification offset %u (x %u) "
"for queue %u > %zd",
off, vp_dev->notify_offset_multiplier,
index, vp_dev->notify_len);
off, mdev->notify_offset_multiplier,
index, mdev->notify_len);
err = -EINVAL;
goto err_map_notify;
}
vq->priv = (void __force *)vp_dev->notify_base +
off * vp_dev->notify_offset_multiplier;
vq->priv = (void __force *)mdev->notify_base +
off * mdev->notify_offset_multiplier;
} else {
vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
vp_dev->notify_map_cap, 2, 2,
off * vp_dev->notify_offset_multiplier, 2,
NULL);
vq->priv = (void __force *)vp_modern_map_capability(mdev,
mdev->notify_map_cap, 2, 2,
off * mdev->notify_offset_multiplier, 2,
NULL);
}
if (!vq->priv) {
@ -382,8 +254,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
}
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
msix_vec = vp_ioread16(&cfg->queue_msix_vector);
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto err_assign_vector;
@ -393,8 +264,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
return vq;
err_assign_vector:
if (!vp_dev->notify_base)
pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify:
vring_del_virtqueue(vq);
return ERR_PTR(err);
@ -416,10 +287,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
/* Select and activate all queues. Has to be done last: once we do
* this, there's no way to go back except reset.
*/
list_for_each_entry(vq, &vdev->vqs, list) {
vp_iowrite16(vq->index, &vp_dev->common->queue_select);
vp_iowrite16(1, &vp_dev->common->queue_enable);
}
list_for_each_entry(vq, &vdev->vqs, list)
vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true);
return 0;
}
@ -428,18 +297,14 @@ static void del_vq(struct virtio_pci_vq_info *info)
{
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
vp_iowrite16(vq->index, &vp_dev->common->queue_select);
if (vp_dev->msix_enabled)
vp_modern_queue_vector(mdev, vq->index,
VIRTIO_MSI_NO_VECTOR);
if (vp_dev->msix_enabled) {
vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
&vp_dev->common->queue_msix_vector);
/* Flush the write out to device */
vp_ioread16(&vp_dev->common->queue_msix_vector);
}
if (!vp_dev->notify_base)
pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __force __iomem *)vq->priv);
vring_del_virtqueue(vq);
}
@ -571,261 +436,36 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.get_shm_region = vp_get_shm_region,
};
/**
* virtio_pci_find_capability - walk capabilities to find device info.
* @dev: the pci device
* @cfg_type: the VIRTIO_PCI_CAP_* value we seek
* @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
* @bars: the bitmask of BARs
*
* Returns offset of the capability, or 0.
*/
static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
u32 ioresource_types, int *bars)
{
int pos;
for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
pos > 0;
pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
u8 type, bar;
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
cfg_type),
&type);
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
bar),
&bar);
/* Ignore structures with reserved BAR values */
if (bar > 0x5)
continue;
if (type == cfg_type) {
if (pci_resource_len(dev, bar) &&
pci_resource_flags(dev, bar) & ioresource_types) {
*bars |= (1 << bar);
return pos;
}
}
}
return 0;
}
/* This is part of the ABI. Don't screw with it. */
static inline void check_offsets(void)
{
/* Note: disk space was harmed in compilation of this function. */
BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
offsetof(struct virtio_pci_cap, cap_vndr));
BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
offsetof(struct virtio_pci_cap, cap_next));
BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
offsetof(struct virtio_pci_cap, cap_len));
BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
offsetof(struct virtio_pci_cap, cfg_type));
BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
offsetof(struct virtio_pci_cap, bar));
BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
offsetof(struct virtio_pci_cap, offset));
BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
offsetof(struct virtio_pci_cap, length));
BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
offsetof(struct virtio_pci_common_cfg,
device_feature_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
offsetof(struct virtio_pci_common_cfg, device_feature));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
offsetof(struct virtio_pci_common_cfg,
guest_feature_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
offsetof(struct virtio_pci_common_cfg, guest_feature));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
offsetof(struct virtio_pci_common_cfg, msix_config));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
offsetof(struct virtio_pci_common_cfg, num_queues));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
offsetof(struct virtio_pci_common_cfg, device_status));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
offsetof(struct virtio_pci_common_cfg, config_generation));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
offsetof(struct virtio_pci_common_cfg, queue_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
offsetof(struct virtio_pci_common_cfg, queue_size));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
offsetof(struct virtio_pci_common_cfg, queue_enable));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
offsetof(struct virtio_pci_common_cfg, queue_notify_off));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
offsetof(struct virtio_pci_common_cfg, queue_used_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
offsetof(struct virtio_pci_common_cfg, queue_used_hi));
}
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct pci_dev *pci_dev = vp_dev->pci_dev;
int err, common, isr, notify, device;
u32 notify_length;
u32 notify_offset;
int err;
check_offsets();
mdev->pci_dev = pci_dev;
/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
return -ENODEV;
if (pci_dev->device < 0x1040) {
/* Transitional devices: use the PCI subsystem device id as
* virtio device id, same as legacy driver always did.
*/
vp_dev->vdev.id.device = pci_dev->subsystem_device;
} else {
/* Modern devices: simply use PCI device id, but start from 0x1040. */
vp_dev->vdev.id.device = pci_dev->device - 0x1040;
}
vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
/* check for a common config: if not, use legacy mode (bar 0). */
common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
if (!common) {
dev_info(&pci_dev->dev,
"virtio_pci: leaving for legacy driver\n");
return -ENODEV;
}
/* If common is there, these should be too... */
isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
if (!isr || !notify) {
dev_err(&pci_dev->dev,
"virtio_pci: missing capabilities %i/%i/%i\n",
common, isr, notify);
return -EINVAL;
}
err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
if (err)
err = dma_set_mask_and_coherent(&pci_dev->dev,
DMA_BIT_MASK(32));
if (err)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars,
"virtio-pci-modern");
err = vp_modern_probe(mdev);
if (err)
return err;
err = -EINVAL;
vp_dev->common = map_capability(pci_dev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
NULL);
if (!vp_dev->common)
goto err_map_common;
vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
0, 1,
NULL);
if (!vp_dev->isr)
goto err_map_isr;
/* Read notify_off_multiplier from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier),
&vp_dev->notify_offset_multiplier);
/* Read notify length and offset from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.length),
&notify_length);
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.offset),
&notify_offset);
/* We don't know how many VQs we'll map, ahead of the time.
* If notify length is small, map it all now.
* Otherwise, map each VQ individually later.
*/
if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
0, notify_length,
&vp_dev->notify_len);
if (!vp_dev->notify_base)
goto err_map_notify;
} else {
vp_dev->notify_map_cap = notify;
}
/* Again, we don't know how much we should map, but PAGE_SIZE
* is more than enough for all existing devices.
*/
if (device) {
vp_dev->device = map_capability(pci_dev, device, 0, 4,
0, PAGE_SIZE,
&vp_dev->device_len);
if (!vp_dev->device)
goto err_map_device;
if (mdev->device)
vp_dev->vdev.config = &virtio_pci_config_ops;
} else {
else
vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
}
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
vp_dev->isr = mdev->isr;
vp_dev->vdev.id = mdev->id;
return 0;
err_map_device:
if (vp_dev->notify_base)
pci_iounmap(pci_dev, vp_dev->notify_base);
err_map_notify:
pci_iounmap(pci_dev, vp_dev->isr);
err_map_isr:
pci_iounmap(pci_dev, vp_dev->common);
err_map_common:
return err;
}
void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
{
struct pci_dev *pci_dev = vp_dev->pci_dev;
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
if (vp_dev->device)
pci_iounmap(pci_dev, vp_dev->device);
if (vp_dev->notify_base)
pci_iounmap(pci_dev, vp_dev->notify_base);
pci_iounmap(pci_dev, vp_dev->isr);
pci_iounmap(pci_dev, vp_dev->common);
pci_release_selected_regions(pci_dev, vp_dev->modern_bars);
vp_modern_remove(mdev);
}

View file

@ -0,0 +1,599 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/virtio_pci_modern.h>
#include <linux/module.h>
#include <linux/pci.h>
/*
* vp_modern_map_capability - map a part of virtio pci capability
* @mdev: the modern virtio-pci device
* @off: offset of the capability
* @minlen: minimal length of the capability
* @align: align requirement
* @start: start from the capability
* @size: map size
* @len: the length that is actually mapped
*
* Returns the io address of for the part of the capability
*/
void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
size_t minlen,
u32 align,
u32 start, u32 size,
size_t *len)
{
struct pci_dev *dev = mdev->pci_dev;
u8 bar;
u32 offset, length;
void __iomem *p;
pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
bar),
&bar);
pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
&offset);
pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
&length);
if (length <= start) {
dev_err(&dev->dev,
"virtio_pci: bad capability len %u (>%u expected)\n",
length, start);
return NULL;
}
if (length - start < minlen) {
dev_err(&dev->dev,
"virtio_pci: bad capability len %u (>=%zu expected)\n",
length, minlen);
return NULL;
}
length -= start;
if (start + offset < offset) {
dev_err(&dev->dev,
"virtio_pci: map wrap-around %u+%u\n",
start, offset);
return NULL;
}
offset += start;
if (offset & (align - 1)) {
dev_err(&dev->dev,
"virtio_pci: offset %u not aligned to %u\n",
offset, align);
return NULL;
}
if (length > size)
length = size;
if (len)
*len = length;
if (minlen + offset < minlen ||
minlen + offset > pci_resource_len(dev, bar)) {
dev_err(&dev->dev,
"virtio_pci: map virtio %zu@%u "
"out of range on bar %i length %lu\n",
minlen, offset,
bar, (unsigned long)pci_resource_len(dev, bar));
return NULL;
}
p = pci_iomap_range(dev, bar, offset, length);
if (!p)
dev_err(&dev->dev,
"virtio_pci: unable to map virtio %u@%u on bar %i\n",
length, offset, bar);
return p;
}
EXPORT_SYMBOL_GPL(vp_modern_map_capability);
/**
* virtio_pci_find_capability - walk capabilities to find device info.
* @dev: the pci device
* @cfg_type: the VIRTIO_PCI_CAP_* value we seek
* @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
* @bars: the bitmask of BARs
*
* Returns offset of the capability, or 0.
*/
static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
u32 ioresource_types, int *bars)
{
int pos;
for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
pos > 0;
pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
u8 type, bar;
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
cfg_type),
&type);
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
bar),
&bar);
/* Ignore structures with reserved BAR values */
if (bar > 0x5)
continue;
if (type == cfg_type) {
if (pci_resource_len(dev, bar) &&
pci_resource_flags(dev, bar) & ioresource_types) {
*bars |= (1 << bar);
return pos;
}
}
}
return 0;
}
/* This is part of the ABI. Don't screw with it. */
static inline void check_offsets(void)
{
/* Note: disk space was harmed in compilation of this function. */
BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
offsetof(struct virtio_pci_cap, cap_vndr));
BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
offsetof(struct virtio_pci_cap, cap_next));
BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
offsetof(struct virtio_pci_cap, cap_len));
BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
offsetof(struct virtio_pci_cap, cfg_type));
BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
offsetof(struct virtio_pci_cap, bar));
BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
offsetof(struct virtio_pci_cap, offset));
BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
offsetof(struct virtio_pci_cap, length));
BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
offsetof(struct virtio_pci_common_cfg,
device_feature_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
offsetof(struct virtio_pci_common_cfg, device_feature));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
offsetof(struct virtio_pci_common_cfg,
guest_feature_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
offsetof(struct virtio_pci_common_cfg, guest_feature));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
offsetof(struct virtio_pci_common_cfg, msix_config));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
offsetof(struct virtio_pci_common_cfg, num_queues));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
offsetof(struct virtio_pci_common_cfg, device_status));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
offsetof(struct virtio_pci_common_cfg, config_generation));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
offsetof(struct virtio_pci_common_cfg, queue_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
offsetof(struct virtio_pci_common_cfg, queue_size));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
offsetof(struct virtio_pci_common_cfg, queue_enable));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
offsetof(struct virtio_pci_common_cfg, queue_notify_off));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
offsetof(struct virtio_pci_common_cfg, queue_used_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
offsetof(struct virtio_pci_common_cfg, queue_used_hi));
}
/*
* vp_modern_probe: probe the modern virtio pci device, note that the
* caller is required to enable PCI device before calling this function.
* @mdev: the modern virtio-pci device
*
* Return 0 on succeed otherwise fail
*/
int vp_modern_probe(struct virtio_pci_modern_device *mdev)
{
struct pci_dev *pci_dev = mdev->pci_dev;
int err, common, isr, notify, device;
u32 notify_length;
u32 notify_offset;
check_offsets();
mdev->pci_dev = pci_dev;
/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
return -ENODEV;
if (pci_dev->device < 0x1040) {
/* Transitional devices: use the PCI subsystem device id as
* virtio device id, same as legacy driver always did.
*/
mdev->id.device = pci_dev->subsystem_device;
} else {
/* Modern devices: simply use PCI device id, but start from 0x1040. */
mdev->id.device = pci_dev->device - 0x1040;
}
mdev->id.vendor = pci_dev->subsystem_vendor;
/* check for a common config: if not, use legacy mode (bar 0). */
common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
if (!common) {
dev_info(&pci_dev->dev,
"virtio_pci: leaving for legacy driver\n");
return -ENODEV;
}
/* If common is there, these should be too... */
isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
if (!isr || !notify) {
dev_err(&pci_dev->dev,
"virtio_pci: missing capabilities %i/%i/%i\n",
common, isr, notify);
return -EINVAL;
}
err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
if (err)
err = dma_set_mask_and_coherent(&pci_dev->dev,
DMA_BIT_MASK(32));
if (err)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
"virtio-pci-modern");
if (err)
return err;
err = -EINVAL;
mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
0, 1,
NULL);
if (!mdev->isr)
goto err_map_isr;
/* Read notify_off_multiplier from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier),
&mdev->notify_offset_multiplier);
/* Read notify length and offset from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.length),
&notify_length);
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.offset),
&notify_offset);
/* We don't know how many VQs we'll map, ahead of the time.
* If notify length is small, map it all now.
* Otherwise, map each VQ individually later.
*/
if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
mdev->notify_base = vp_modern_map_capability(mdev, notify,
2, 2,
0, notify_length,
&mdev->notify_len);
if (!mdev->notify_base)
goto err_map_notify;
} else {
mdev->notify_map_cap = notify;
}
/* Again, we don't know how much we should map, but PAGE_SIZE
* is more than enough for all existing devices.
*/
if (device) {
mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
0, PAGE_SIZE,
&mdev->device_len);
if (!mdev->device)
goto err_map_device;
}
return 0;
err_map_device:
if (mdev->notify_base)
pci_iounmap(pci_dev, mdev->notify_base);
err_map_notify:
pci_iounmap(pci_dev, mdev->isr);
err_map_isr:
pci_iounmap(pci_dev, mdev->common);
err_map_common:
return err;
}
EXPORT_SYMBOL_GPL(vp_modern_probe);
/*
* vp_modern_probe: remove and cleanup the modern virtio pci device
* @mdev: the modern virtio-pci device
*/
void vp_modern_remove(struct virtio_pci_modern_device *mdev)
{
struct pci_dev *pci_dev = mdev->pci_dev;
if (mdev->device)
pci_iounmap(pci_dev, mdev->device);
if (mdev->notify_base)
pci_iounmap(pci_dev, mdev->notify_base);
pci_iounmap(pci_dev, mdev->isr);
pci_iounmap(pci_dev, mdev->common);
pci_release_selected_regions(pci_dev, mdev->modern_bars);
}
EXPORT_SYMBOL_GPL(vp_modern_remove);
/*
* vp_modern_get_features - get features from device
* @mdev: the modern virtio-pci device
*
* Returns the features read from the device
*/
u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
u64 features;
vp_iowrite32(0, &cfg->device_feature_select);
features = vp_ioread32(&cfg->device_feature);
vp_iowrite32(1, &cfg->device_feature_select);
features |= ((u64)vp_ioread32(&cfg->device_feature) << 32);
return features;
}
EXPORT_SYMBOL_GPL(vp_modern_get_features);
/*
* vp_modern_set_features - set features to device
* @mdev: the modern virtio-pci device
* @features: the features set to device
*/
void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
u64 features)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
vp_iowrite32(0, &cfg->guest_feature_select);
vp_iowrite32((u32)features, &cfg->guest_feature);
vp_iowrite32(1, &cfg->guest_feature_select);
vp_iowrite32(features >> 32, &cfg->guest_feature);
}
EXPORT_SYMBOL_GPL(vp_modern_set_features);
/*
* vp_modern_generation - get the device genreation
* @mdev: the modern virtio-pci device
*
* Returns the genreation read from device
*/
u32 vp_modern_generation(struct virtio_pci_modern_device *mdev)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
return vp_ioread8(&cfg->config_generation);
}
EXPORT_SYMBOL_GPL(vp_modern_generation);
/*
* vp_modern_get_status - get the device status
* @mdev: the modern virtio-pci device
*
* Returns the status read from device
*/
u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
return vp_ioread8(&cfg->device_status);
}
EXPORT_SYMBOL_GPL(vp_modern_get_status);
/*
* vp_modern_set_status - set status to device
* @mdev: the modern virtio-pci device
* @status: the status set to device
*/
void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
u8 status)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
vp_iowrite8(status, &cfg->device_status);
}
EXPORT_SYMBOL_GPL(vp_modern_set_status);
/*
* vp_modern_queue_vector - set the MSIX vector for a specific virtqueue
* @mdev: the modern virtio-pci device
* @index: queue index
* @vector: the config vector
*
* Returns the config vector read from the device
*/
u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev,
u16 index, u16 vector)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
vp_iowrite16(index, &cfg->queue_select);
vp_iowrite16(vector, &cfg->queue_msix_vector);
/* Flush the write out to device */
return vp_ioread16(&cfg->queue_msix_vector);
}
EXPORT_SYMBOL_GPL(vp_modern_queue_vector);
/*
* vp_modern_config_vector - set the vector for config interrupt
* @mdev: the modern virtio-pci device
* @vector: the config vector
*
* Returns the config vector read from the device
*/
u16 vp_modern_config_vector(struct virtio_pci_modern_device *mdev,
u16 vector)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
/* Setup the vector used for configuration events */
vp_iowrite16(vector, &cfg->msix_config);
/* Verify we had enough resources to assign the vector */
/* Will also flush the write out to device */
return vp_ioread16(&cfg->msix_config);
}
EXPORT_SYMBOL_GPL(vp_modern_config_vector);
/*
* vp_modern_queue_address - set the virtqueue address
* @mdev: the modern virtio-pci device
* @index: the queue index
* @desc_addr: address of the descriptor area
* @driver_addr: address of the driver area
* @device_addr: address of the device area
*/
void vp_modern_queue_address(struct virtio_pci_modern_device *mdev,
u16 index, u64 desc_addr, u64 driver_addr,
u64 device_addr)
{
struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
vp_iowrite16(index, &cfg->queue_select);
vp_iowrite64_twopart(desc_addr, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
vp_iowrite64_twopart(driver_addr, &cfg->queue_avail_lo,
&cfg->queue_avail_hi);
vp_iowrite64_twopart(device_addr, &cfg->queue_used_lo,
&cfg->queue_used_hi);
}
EXPORT_SYMBOL_GPL(vp_modern_queue_address);
/*
* vp_modern_set_queue_enable - enable a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
* @enable: whether the virtqueue is enable or not
*/
void vp_modern_set_queue_enable(struct virtio_pci_modern_device *mdev,
u16 index, bool enable)
{
vp_iowrite16(index, &mdev->common->queue_select);
vp_iowrite16(enable, &mdev->common->queue_enable);
}
EXPORT_SYMBOL_GPL(vp_modern_set_queue_enable);
/*
* vp_modern_get_queue_enable - enable a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
*
* Returns whether a virtqueue is enabled or not
*/
bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_enable);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_enable);
/*
* vp_modern_set_queue_size - set size for a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
* @size: the size of the virtqueue
*/
void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev,
u16 index, u16 size)
{
vp_iowrite16(index, &mdev->common->queue_select);
vp_iowrite16(size, &mdev->common->queue_size);
}
EXPORT_SYMBOL_GPL(vp_modern_set_queue_size);
/*
* vp_modern_get_queue_size - get size for a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
*
* Returns the size of the virtqueue
*/
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_size);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_size);
/*
* vp_modern_get_num_queues - get the number of virtqueues
* @mdev: the modern virtio-pci device
*
* Returns the number of virtqueues
*/
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev)
{
return vp_ioread16(&mdev->common->num_queues);
}
EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);
/*
* vp_modern_get_queue_notify_off - get notification offset for a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
*
* Returns the notification offset for a virtqueue
*/
u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_notify_off);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_notify_off);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Modern Virtio PCI Device");
MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
MODULE_LICENSE("GPL");

View file

@ -225,9 +225,8 @@ static void virtio_vdpa_del_vq(struct virtqueue *vq)
list_del(&info->node);
spin_unlock_irqrestore(&vd_dev->lock, flags);
/* Select and deactivate the queue */
/* Select and deactivate the queue (best effort) */
ops->set_vq_ready(vdpa, index, 0);
WARN_ON(ops->get_vq_ready(vdpa, index));
vring_del_virtqueue(vq);

View file

@ -35,6 +35,8 @@ struct vdpa_vq_state {
u16 avail_index;
};
struct vdpa_mgmt_dev;
/**
* vDPA device - representation of a vDPA device
* @dev: underlying device
@ -43,6 +45,8 @@ struct vdpa_vq_state {
* @index: device index
* @features_valid: were features initialized? for legacy guests
* @nvqs: maximum number of supported virtqueues
* @mdev: management device pointer; caller must setup when registering device as part
* of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
*/
struct vdpa_device {
struct device dev;
@ -51,6 +55,7 @@ struct vdpa_device {
unsigned int index;
bool features_valid;
int nvqs;
struct vdpa_mgmt_dev *mdev;
};
/**
@ -245,20 +250,22 @@ struct vdpa_config_ops {
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
int nvqs,
size_t size);
int nvqs, size_t size, const char *name);
#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs) \
#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs, name) \
container_of(__vdpa_alloc_device( \
parent, config, nvqs, \
sizeof(dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
dev_struct, member))), \
dev_struct, member)), name), \
dev_struct, member)
int vdpa_register_device(struct vdpa_device *vdev);
void vdpa_unregister_device(struct vdpa_device *vdev);
int _vdpa_register_device(struct vdpa_device *vdev);
void _vdpa_unregister_device(struct vdpa_device *vdev);
/**
* vdpa_driver - operations for a vDPA driver
* @driver: underlying device driver
@ -336,4 +343,33 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
ops->get_config(vdev, offset, buf, len);
}
/**
* vdpa_mgmtdev_ops - vdpa device ops
* @dev_add: Add a vdpa device using alloc and register
* @mdev: parent device to use for device addition
* @name: name of the new vdpa device
* Driver need to add a new device using _vdpa_register_device()
* after fully initializing the vdpa device. Driver must return 0
* on success or appropriate error code.
* @dev_del: Remove a vdpa device using unregister
* @mdev: parent device to use for device removal
* @dev: vdpa device to remove
* Driver need to remove the specified device by calling
* _vdpa_unregister_device().
*/
struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
};
struct vdpa_mgmt_dev {
struct device *device;
const struct vdpa_mgmtdev_ops *ops;
const struct virtio_device_id *id_table; /* supported ids */
struct list_head list;
};
int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
#endif /* _LINUX_VDPA_H */

View file

@ -0,0 +1,111 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VIRTIO_PCI_MODERN_H
#define _LINUX_VIRTIO_PCI_MODERN_H
#include <linux/pci.h>
#include <linux/virtio_pci.h>
struct virtio_pci_modern_device {
struct pci_dev *pci_dev;
struct virtio_pci_common_cfg __iomem *common;
/* Device-specific data (non-legacy mode) */
void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
/* Where to read and clear interrupt */
u8 __iomem *isr;
/* So we can sanity-check accesses. */
size_t notify_len;
size_t device_len;
/* Capability for when we need to map notifications per-vq. */
int notify_map_cap;
/* Multiply queue_notify_off by this value. (non-legacy mode). */
u32 notify_offset_multiplier;
int modern_bars;
struct virtio_device_id id;
};
/*
* Type-safe wrappers for io accesses.
* Use these to enforce at compile time the following spec requirement:
*
* The driver MUST access each field using the natural access
* method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
* for 16-bit fields and 8-bit accesses for 8-bit fields.
*/
static inline u8 vp_ioread8(const u8 __iomem *addr)
{
return ioread8(addr);
}
static inline u16 vp_ioread16 (const __le16 __iomem *addr)
{
return ioread16(addr);
}
static inline u32 vp_ioread32(const __le32 __iomem *addr)
{
return ioread32(addr);
}
static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
{
iowrite8(value, addr);
}
static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
{
iowrite16(value, addr);
}
static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
{
iowrite32(value, addr);
}
static inline void vp_iowrite64_twopart(u64 val,
__le32 __iomem *lo,
__le32 __iomem *hi)
{
vp_iowrite32((u32)val, lo);
vp_iowrite32(val >> 32, hi);
}
u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev);
void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
u64 features);
u32 vp_modern_generation(struct virtio_pci_modern_device *mdev);
u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev);
void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
u8 status);
u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev,
u16 idx, u16 vector);
u16 vp_modern_config_vector(struct virtio_pci_modern_device *mdev,
u16 vector);
void vp_modern_queue_address(struct virtio_pci_modern_device *mdev,
u16 index, u64 desc_addr, u64 driver_addr,
u64 device_addr);
void vp_modern_set_queue_enable(struct virtio_pci_modern_device *mdev,
u16 idx, bool enable);
bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
u16 idx);
void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev,
u16 idx, u16 size);
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
u16 idx);
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev);
u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
u16 idx);
void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
size_t minlen,
u32 align,
u32 start, u32 size,
size_t *len);
int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev);
#endif

40
include/uapi/linux/vdpa.h Normal file
View file

@ -0,0 +1,40 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* vdpa device management interface
* Copyright (c) 2020 Mellanox Technologies Ltd. All rights reserved.
*/
#ifndef _UAPI_LINUX_VDPA_H_
#define _UAPI_LINUX_VDPA_H_
#define VDPA_GENL_NAME "vdpa"
#define VDPA_GENL_VERSION 0x1
enum vdpa_command {
VDPA_CMD_UNSPEC,
VDPA_CMD_MGMTDEV_NEW,
VDPA_CMD_MGMTDEV_GET, /* can dump */
VDPA_CMD_DEV_NEW,
VDPA_CMD_DEV_DEL,
VDPA_CMD_DEV_GET, /* can dump */
};
enum vdpa_attr {
VDPA_ATTR_UNSPEC,
/* bus name (optional) + dev name together make the parent device handle */
VDPA_ATTR_MGMTDEV_BUS_NAME, /* string */
VDPA_ATTR_MGMTDEV_DEV_NAME, /* string */
VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES, /* u64 */
VDPA_ATTR_DEV_NAME, /* string */
VDPA_ATTR_DEV_ID, /* u32 */
VDPA_ATTR_DEV_VENDOR_ID, /* u32 */
VDPA_ATTR_DEV_MAX_VQS, /* u32 */
VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */
/* new attributes must be added above here */
VDPA_ATTR_MAX,
};
#endif