forked from rpms/qemu-kvm
238 lines
8.4 KiB
238 lines
8.4 KiB
9 months ago
|
From 965a44793806fef2094906947bd3b428638bf89a Mon Sep 17 00:00:00 2001
|
||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||
|
Date: Tue, 21 Nov 2023 16:44:10 +0800
|
||
|
Subject: [PATCH 031/101] vfio/pci: Make vfio cdev pre-openable by passing a
|
||
|
file handle
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||
|
RH-Commit: [30/67] a14b824b700e8fb36633cd159bcc422d992a316f (eauger1/centos-qemu-kvm)
|
||
|
|
||
|
Conflicts: contextual conflict in hw/vfio/pci.c due to
|
||
|
RHEL-only f73562144e492 vfio: cap number of devices that can be assigned
|
||
|
|
||
|
This gives management tools like libvirt a chance to open the vfio
|
||
|
cdev with privilege and pass FD to qemu. This way qemu never needs
|
||
|
to have privilege to open a VFIO or iommu cdev node.
|
||
|
|
||
|
Together with the earlier support of pre-opening /dev/iommu device,
|
||
|
now we have full support of passing a vfio device to unprivileged
|
||
|
qemu by management tool. This mode is no more considered for the
|
||
|
legacy backend. So let's remove the "TODO" comment.
|
||
|
|
||
|
Add helper functions vfio_device_set_fd() and vfio_device_get_name()
|
||
|
to set fd and get device name, they will also be used by other vfio
|
||
|
devices.
|
||
|
|
||
|
There is no easy way to check if a device is mdev with FD passing,
|
||
|
so fail the x-balloon-allowed check unconditionally in this case.
|
||
|
|
||
|
There is also no easy way to get BDF as name with FD passing, so
|
||
|
we fake a name by VFIO_FD[fd].
|
||
|
|
||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||
|
(cherry picked from commit da3e04b26fd8d15b344944504d5ffa9c5f20b54b)
|
||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||
|
---
|
||
|
hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++
|
||
|
hw/vfio/iommufd.c | 12 ++++++----
|
||
|
hw/vfio/pci.c | 28 +++++++++++++----------
|
||
|
include/hw/vfio/vfio-common.h | 4 ++++
|
||
|
4 files changed, 71 insertions(+), 16 deletions(-)
|
||
|
|
||
|
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
|
||
|
index 168847e7c5..3592c3d54e 100644
|
||
|
--- a/hw/vfio/helpers.c
|
||
|
+++ b/hw/vfio/helpers.c
|
||
|
@@ -27,6 +27,7 @@
|
||
|
#include "trace.h"
|
||
|
#include "qapi/error.h"
|
||
|
#include "qemu/error-report.h"
|
||
|
+#include "monitor/monitor.h"
|
||
|
|
||
|
/*
|
||
|
* Common VFIO interrupt disable
|
||
|
@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
+
|
||
|
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
|
||
|
+{
|
||
|
+ struct stat st;
|
||
|
+
|
||
|
+ if (vbasedev->fd < 0) {
|
||
|
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||
|
+ error_setg_errno(errp, errno, "no such host device");
|
||
|
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
|
||
|
+ return -errno;
|
||
|
+ }
|
||
|
+ /* User may specify a name, e.g: VFIO platform device */
|
||
|
+ if (!vbasedev->name) {
|
||
|
+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||
|
+ }
|
||
|
+ } else {
|
||
|
+ if (!vbasedev->iommufd) {
|
||
|
+ error_setg(errp, "Use FD passing only with iommufd backend");
|
||
|
+ return -EINVAL;
|
||
|
+ }
|
||
|
+ /*
|
||
|
+ * Give a name with fd so any function printing out vbasedev->name
|
||
|
+ * will not break.
|
||
|
+ */
|
||
|
+ if (!vbasedev->name) {
|
||
|
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
|
||
|
+{
|
||
|
+ int fd = monitor_fd_param(monitor_cur(), str, errp);
|
||
|
+
|
||
|
+ if (fd < 0) {
|
||
|
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+ vbasedev->fd = fd;
|
||
|
+}
|
||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||
|
index 6e53e013ef..5accd26484 100644
|
||
|
--- a/hw/vfio/iommufd.c
|
||
|
+++ b/hw/vfio/iommufd.c
|
||
|
@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||
|
uint32_t ioas_id;
|
||
|
Error *err = NULL;
|
||
|
|
||
|
- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||
|
- if (devfd < 0) {
|
||
|
- return devfd;
|
||
|
+ if (vbasedev->fd < 0) {
|
||
|
+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||
|
+ if (devfd < 0) {
|
||
|
+ return devfd;
|
||
|
+ }
|
||
|
+ vbasedev->fd = devfd;
|
||
|
+ } else {
|
||
|
+ devfd = vbasedev->fd;
|
||
|
}
|
||
|
- vbasedev->fd = devfd;
|
||
|
|
||
|
ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
|
||
|
if (ret) {
|
||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||
|
index 39e6a6678e..3412a63bb1 100644
|
||
|
--- a/hw/vfio/pci.c
|
||
|
+++ b/hw/vfio/pci.c
|
||
|
@@ -2949,7 +2949,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||
|
VFIOGroup *group;
|
||
|
char *tmp, *subsys;
|
||
|
Error *err = NULL;
|
||
|
- struct stat st;
|
||
|
int ret, i = 0;
|
||
|
bool is_mdev;
|
||
|
char uuid[UUID_STR_LEN];
|
||
|
@@ -2976,11 +2975,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- if (!vbasedev->sysfsdev) {
|
||
|
+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
|
||
|
if (!(~vdev->host.domain || ~vdev->host.bus ||
|
||
|
~vdev->host.slot || ~vdev->host.function)) {
|
||
|
error_setg(errp, "No provided host device");
|
||
|
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
|
||
|
+#ifdef CONFIG_IOMMUFD
|
||
|
+ "or -device vfio-pci,fd=DEVICE_FD "
|
||
|
+#endif
|
||
|
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
|
||
|
return;
|
||
|
}
|
||
|
@@ -2990,13 +2992,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||
|
vdev->host.slot, vdev->host.function);
|
||
|
}
|
||
|
|
||
|
- if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||
|
- error_setg_errno(errp, errno, "no such host device");
|
||
|
- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
|
||
|
+ if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||
|
return;
|
||
|
}
|
||
|
-
|
||
|
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||
|
vbasedev->ops = &vfio_pci_ops;
|
||
|
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
|
||
|
vbasedev->dev = DEVICE(vdev);
|
||
|
@@ -3356,6 +3354,7 @@ static void vfio_instance_init(Object *obj)
|
||
|
vdev->host.bus = ~0U;
|
||
|
vdev->host.slot = ~0U;
|
||
|
vdev->host.function = ~0U;
|
||
|
+ vdev->vbasedev.fd = -1;
|
||
|
|
||
|
vdev->nv_gpudirect_clique = 0xFF;
|
||
|
|
||
|
@@ -3412,11 +3411,6 @@ static Property vfio_pci_dev_properties[] = {
|
||
|
qdev_prop_nv_gpudirect_clique, uint8_t),
|
||
|
DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
|
||
|
OFF_AUTOPCIBAR_OFF),
|
||
|
- /*
|
||
|
- * TODO - support passed fds... is this necessary?
|
||
|
- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
|
||
|
- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
|
||
|
- */
|
||
|
#ifdef CONFIG_IOMMUFD
|
||
|
DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
|
||
|
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||
|
@@ -3424,6 +3418,13 @@ static Property vfio_pci_dev_properties[] = {
|
||
|
DEFINE_PROP_END_OF_LIST(),
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_IOMMUFD
|
||
|
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
|
||
|
+{
|
||
|
+ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||
|
{
|
||
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
||
|
@@ -3431,6 +3432,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||
|
|
||
|
dc->reset = vfio_pci_reset;
|
||
|
device_class_set_props(dc, vfio_pci_dev_properties);
|
||
|
+#ifdef CONFIG_IOMMUFD
|
||
|
+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
|
||
|
+#endif
|
||
|
dc->desc = "VFIO-based PCI device assignment";
|
||
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||
|
pdc->realize = vfio_realize;
|
||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||
|
index 3dac5c167e..697bf24a35 100644
|
||
|
--- a/include/hw/vfio/vfio-common.h
|
||
|
+++ b/include/hw/vfio/vfio-common.h
|
||
|
@@ -251,4 +251,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||
|
hwaddr size);
|
||
|
int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
|
||
|
uint64_t size, ram_addr_t ram_addr);
|
||
|
+
|
||
|
+/* Returns 0 on success, or a negative errno. */
|
||
|
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
|
||
|
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
|
||
|
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||
|
--
|
||
|
2.39.3
|
||
|
|