forked from rpms/qemu-kvm
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
142 lines
5.9 KiB
142 lines
5.9 KiB
From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
|
Date: Wed, 12 Jul 2023 17:46:56 +0200
|
|
Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Cédric Le Goater <clg@redhat.com>
|
|
RH-MergeRequest: 179: vfio: live migration support
|
|
RH-Bugzilla: 2192818
|
|
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s)
|
|
|
|
Bugzilla: https://bugzilla.redhat.com/2192818
|
|
|
|
commit b5048a4cbfa0
|
|
Author: Alex Williamson <alex.williamson@redhat.com>
|
|
Date: Thu May 4 14:42:48 2023 -0600
|
|
|
|
vfio/pci: Static Resizable BAR capability
|
|
|
|
The PCI Resizable BAR (ReBAR) capability is currently hidden from the
|
|
VM because the protocol for interacting with the capability does not
|
|
support a mechanism for the device to reject an advertised supported
|
|
BAR size. However, when assigned to a VM, the act of resizing the
|
|
BAR requires adjustment of host resources for the device, which
|
|
absolutely can fail. Linux does not currently allow us to reserve
|
|
resources for the device independent of the current usage.
|
|
|
|
The only writable field within the ReBAR capability is the BAR Size
|
|
register. The PCIe spec indicates that when written, the device
|
|
should immediately begin to operate with the provided BAR size. The
|
|
spec however also notes that software must only write values
|
|
corresponding to supported sizes as indicated in the capability and
|
|
control registers. Writing unsupported sizes produces undefined
|
|
results. Therefore, if the hypervisor were to virtualize the
|
|
capability and control registers such that the current size is the
|
|
only indicated available size, then a write of anything other than
|
|
the current size falls into the category of undefined behavior,
|
|
where we can essentially expose the modified ReBAR capability as
|
|
read-only.
|
|
|
|
This may seem pointless, but users have reported that virtualizing
|
|
the capability in this way not only allows guest software to expose
|
|
related features as available (even if only cosmetic), but in some
|
|
scenarios can resolve guest driver issues. Additionally, no
|
|
regressions in behavior have been reported for this change.
|
|
|
|
A caveat here is that the PCIe spec requires for compatibility that
|
|
devices report support for a size in the range of 1MB to 512GB,
|
|
therefore if the current BAR size falls outside that range we revert
|
|
to hiding the capability.
|
|
|
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
|
Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com
|
|
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
|
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
|
---
|
|
hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 53 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
|
index 579b92a6ed..6cd3a98c39 100644
|
|
--- a/hw/vfio/pci.c
|
|
+++ b/hw/vfio/pci.c
|
|
@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
|
|
return 0;
|
|
}
|
|
|
|
+static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos)
|
|
+{
|
|
+ uint32_t ctrl;
|
|
+ int i, nbar;
|
|
+
|
|
+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL);
|
|
+ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
|
|
+
|
|
+ for (i = 0; i < nbar; i++) {
|
|
+ uint32_t cap;
|
|
+ int size;
|
|
+
|
|
+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8));
|
|
+ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT;
|
|
+
|
|
+ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */
|
|
+ cap = size <= 27 ? 1U << (size + 4) : 0;
|
|
+
|
|
+ /*
|
|
+ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one
|
|
+ * size in the range 1MB to 512GB. We intend to mask all sizes except
|
|
+ * the one currently enabled in the size field, therefore if it's
|
|
+ * outside the range, hide the whole capability as this virtualization
|
|
+ * trick won't work. If >512GB resizable BARs start to appear, we
|
|
+ * might need an opt-in or reservation scheme in the kernel.
|
|
+ */
|
|
+ if (!(cap & PCI_REBAR_CAP_SIZES)) {
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Hide all sizes reported in the ctrl reg per above requirement. */
|
|
+ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE |
|
|
+ PCI_REBAR_CTRL_NBAR_MASK |
|
|
+ PCI_REBAR_CTRL_BAR_IDX);
|
|
+
|
|
+ /*
|
|
+ * The BAR size field is RW, however we've mangled the capability
|
|
+ * register such that we only report a single size, ie. the current
|
|
+ * BAR size. A write of an unsupported value is undefined, therefore
|
|
+ * the register field is essentially RO.
|
|
+ */
|
|
+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0);
|
|
+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
|
|
{
|
|
PCIDevice *pdev = &vdev->pdev;
|
|
@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
|
|
case 0: /* kernel masked capability */
|
|
case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
|
|
case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
|
|
- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */
|
|
trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
|
|
break;
|
|
+ case PCI_EXT_CAP_ID_REBAR:
|
|
+ if (!vfio_setup_rebar_ecap(vdev, next)) {
|
|
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
|
|
+ }
|
|
+ break;
|
|
default:
|
|
pcie_add_capability(pdev, cap_id, cap_ver, next, size);
|
|
}
|
|
--
|
|
2.39.3
|
|
|